# ## Clear R-workspace
# rm(list=ls(all=TRUE))
## Close all graphic devices
graphics.off()
#####################
### Load packages ###
#####################
library(pacman)
pacman::p_load(extrafont,DT,dplyr,tibble,ggplot2, hrbrthemes, grid, gridExtra, cowplot,Cairo,
tidyverse,ggpubr, ggrepel,cowplot,
RColorBrewer, pander,gridExtra, survival, survminer, caret, precrec,
reconPlots,flextable, extrafont,readxl, reshape2,rstatix,ggbeeswarm,
cowplot,tidyquant,zoo,plyr,xlsx,corrr,ggnet,ggnetwork,
tidygraph,ggraph,intergraph,clusterProfiler,AnnotationDbi,org.Hs.eg.db,rrvgo,data.table,GO.db,multienrichjam, DOSE, openxlsx,GOfuncR, officer)
extrafont::loadfonts(device="win")
windowsFonts(sans="Palatino Linotype")
loadfonts(device="win")
loadfonts(device="postscript")
###########################
### Main analysis paths ###
###########################
# Main
scriptsPath <- paste0("scripts/")
scriptsFunctionsPath <- paste0(scriptsPath,"functions/")
projectDataPath <- paste0("data/")
# Input
tcgaInputData <- paste0(projectDataPath,"TCGA/")
antiPDL1publicInputData <- paste0(projectDataPath,"antiPD(L)1_public/")
otherInputData <- paste0(projectDataPath,"other/")
referenceInputData <- paste0(projectDataPath,"reference/")
# Output
projectOutDataPath <- paste0("output/data_files/")
tcgaIntermediateData <- paste0(projectOutDataPath,"TCGA/")
antiPDL1publicIntermediateData <- paste0(projectOutDataPath,"antiPD(L)1_public/")
# Output for differential expression analysis and gene ontology (similarity)
DEA_GOoutData <- paste0(projectOutDataPath,"DEA_GO/")
# Output for survival analysis
survivalOutData <- paste0(projectOutDataPath,"survival/")
figuresPath <- paste0("output/figures/")
tablesPath <- paste0("output/tables/")
modelsPath <- paste0("output/models/")
# Session/dependencies
sessionInfoPath <- paste0("session_info/")
# Other intermediate output
testDataOut <-paste0(modelsPath,"test_data/")
rocCompareOut <-paste0(modelsPath,"roc_compare/")
modelsInterm <- paste0(modelsPath,"models_interm_files/")
##################################
### LOAD SOURCE FUNCTIONS FILE ###
##################################
source(paste0(scriptsFunctionsPath,"paper_figures_tables_files_functions.R"))
#####################
### File suffixes ###
#####################
Rdata.suffix <- ".RData"
# Other script variables
modelTypeA <- "_TCRArichnessIGH.ds"
modelTypeB <- "_noLIU"
modelTypeC <- "_mintcr4bcr10"
## Other script parameters ##
runFiltSim <- TRUE
geneid2symb <- read.table(file.path(paste0(referenceInputData, "genID2name.csv")), header = TRUE, sep = ",")
geneid2symb$gene_id <- as.character(geneid2symb$gene_id)
geneid2symb$gene_name <- as.character(geneid2symb$gene_name)
tcgaData.tp <- loadRData(paste0(tcgaIntermediateData, "tcga.tp.Filt.final",Rdata.suffix))
# tcga.drug.notICI <-loadRData(paste0(tcgaIntermediateData, "tcga.tp.nonFilt.final_ICI4Tissues.RData")) # NOT USED
tcga.drug.ici.To.Exclude2 <- loadRData(paste0(tcgaIntermediateData, "tcga_patientsNOTici_toEXCLUDE_4Tissues.RData"))
# ###################
# # Compare with init TCGA
# tcgaData.tp.init <- loadRData(paste0("C:/Users/aimilia/BIOINF/1_DATA/3_TCGA/ClinicalDataTables/RData/", "tcga.tp.Filt.final",Rdata.suffix))
#
# median(tcgaData.tp.init$TIS_sigscore, na.rm = T)
# median(tcgaData.tp$TIS_sigscore, na.rm = T)
#
# median(tcgaData.tp.init$BCR_Richness, na.rm = T)
# median(tcgaData.tp$BCR_Richness, na.rm = T)
##################
table(tcgaData.tp$project)
##
## ACC BLCA BRCA BRCA_Her2 BRCA_LuminalA
## 92 412 1099 82 568
## BRCA_LuminalB BRCA_Normal BRCA_TN CESC CHOL
## 217 140 191 308 45
## COAD COAD_MSI COAD_MSI_low COAD_MSS DLBC
## 461 78 80 275 50
## ESCA GBM HNSC KICH KIRC
## 185 611 528 113 537
## KIRP LAML LGG LIHC LUAD
## 291 200 516 377 583
## LUSC MESO OV PAAD PCPG
## 504 87 606 185 179
## PRAD READ SARC SKCM STAD
## 500 171 261 470 443
## TGCT THCA THYM UCEC UCS
## 150 507 124 560 57
## UVM
## 80
# table(tcga.drug.notICI$project) # NOT USED
# See which patients we need to remove
tcgaData.tp %>% dplyr::filter(bcr_patient_barcode %in% tcga.drug.ici.To.Exclude2) %>% droplevels() %>% distinct() %>% dplyr::select(bcr_patient_barcode, project) %>% distinct()
## bcr_patient_barcode project
## 1: TCGA-DK-AA77 BLCA
## 2: TCGA-ER-A2NF SKCM
## 3: TCGA-GN-A8LN SKCM
## 4: TCGA-D9-A148 SKCM
## 5: TCGA-DA-A1I1 SKCM
## 6: TCGA-DA-A3F2 SKCM
## 7: TCGA-DA-A3F5 SKCM
## 8: TCGA-EE-A2GS SKCM
## 9: TCGA-EE-A3JI SKCM
## 10: TCGA-ER-A2NG SKCM
## 11: TCGA-FR-A3YN SKCM
## 12: TCGA-FR-A3YO SKCM
## 13: TCGA-FR-A69P SKCM
## 14: TCGA-FR-A7U9 SKCM
## 15: TCGA-FR-A8YD SKCM
## 16: TCGA-GN-A4U4 SKCM
## 17: TCGA-GN-A4U9 SKCM
## 18: TCGA-GN-A8LK SKCM
## 19: TCGA-QB-AA9O SKCM
## 20: TCGA-WE-A8K5 SKCM
## 21: TCGA-WE-AAA0 SKCM
## 22: TCGA-WE-AAA4 SKCM
## bcr_patient_barcode project
# Remove patients that received aPD1 immunotherapy.
tcgaData.tp.clean <- tcgaData.tp %>% dplyr::filter(bcr_patient_barcode %notin% tcga.drug.ici.To.Exclude2) %>%
droplevels() %>%
distinct()
dim(tcgaData.tp)
## [1] 12923 123
dim(tcgaData.tp.clean)
## [1] 12901 123
featuresSelected <- c("bcr_patient_barcode", "project","PD_L1","logTMB","BCR_Richness","TCR_Richness",
"B_cells.CIBER", "T_Cells_CD8.CIBER", "CD4cells.CIBER","TIS_sigscore", "TumorPurity","OS","OS.time","PFI","PFI.time","age_at_initial_pathologic_diagnosis","gender")
extraFeatures <- c("StromalScore", "ImmuneScore")
featuresIn <- c("TCR_Richness","BCR_Richness","logTMB","PD_L1","TIS_sigscore",
"CD8cells", "CD4cells","B_cells", "TumorPurity")
featuresInLabels <- c("TCR Richness","BCR Richness","logTMB","PD-L1 expression","T-cell inflamed GEP", "CD8+ T-cells infiltration","CD4+ T-cells infiltration","B cells infiltration",
"Tumor Purity")
## FOR TP data
tcgaData.tp.sub <- tcgaData.tp.clean %>% dplyr::select(all_of(featuresSelected)) %>% droplevels() %>% distinct()
tcgaData.tp.sub.extra <- tcgaData.tp.clean %>% dplyr::select(all_of(c(featuresSelected,extraFeatures))) %>% droplevels() %>% distinct()
colnames(tcgaData.tp.sub)[7:9] <- gsub(".CIBER","",colnames(tcgaData.tp.sub)[7:9])
colnames(tcgaData.tp.sub)[8] <- "CD8cells"
# Keep rows where at least on of the features of interested is not NA
tcgaData.tp.sub <-tcgaData.tp.sub[rowSums(is.na(tcgaData.tp.sub[,3:11])) != 9,]
tcgaData.tp.sub.extra <-tcgaData.tp.sub.extra[rowSums(is.na(tcgaData.tp.sub.extra[,3:11])) != 9,]
dim(tcgaData.tp.sub)
## [1] 12385 17
dim(tcgaData.tp.sub.extra)
## [1] 12385 19
mycancertypes <- levels(as.factor(tcgaData.tp.sub$project))
## Exclude THYM and DLBC
mycancertypes.reduced <- mycancertypes[-which(mycancertypes %in% c("THYM", "DLBC","COAD_MSI_low","BRCA_Her2","BRCA_LuminalA", "BRCA_LuminalB","BRCA","BRCA_Normal","COAD","UCEC","LAML"))]
## TCGA histologies aligned with aPD1 data
mycancertypes.final <- c("SKCM", "KIRC","BLCA","STAD")
## Subset data to reduced types
tcgaData.tp.sub.filt <- tcgaData.tp.sub %>% dplyr::filter(project %in% mycancertypes.reduced) %>% droplevels()
tcgaData.tp.sub.extra.filt<- tcgaData.tp.sub.extra %>% dplyr::filter(project %in% mycancertypes.reduced) %>% droplevels()
# Make project a factor with levels
tcgaData.tp.sub.filt$project <- factor(tcgaData.tp.sub.filt$project, levels=mycancertypes.reduced)
tcgaData.tp.sub.extra.filt$project <- factor(tcgaData.tp.sub.extra.filt$project, levels=mycancertypes.reduced)
#CHECK DUPLICATED PATIENTS
which(duplicated(tcgaData.tp.sub.filt$bcr_patient_barcode))
## integer(0)
tcgaData.tp.sub.filt[tcgaData.tp.sub.filt$bcr_patient_barcode==tcgaData.tp.sub.filt$bcr_patient_barcode[125],]
## bcr_patient_barcode project PD_L1 logTMB BCR_Richness TCR_Richness
## 1: TCGA-BT-A2LD BLCA 10.89251 1.282085 2 0
## B_cells CD8cells CD4cells TIS_sigscore TumorPurity OS OS.time PFI
## 1: 0.1571038 0.1523094 0.006754054 -0.7454874 0.9118676 1 623 1
## PFI.time age_at_initial_pathologic_diagnosis gender
## 1: 555 78 FEMALE
tcgaData.tp.sub.filt$OS.time <-tcgaData.tp.sub.filt$OS.time/(365.24/12)
## ADD OBJECTIVE RESPONSE DATA
## ORR data
orrDf <- read_excel(paste0(otherInputData,"ORR_TMB_table.xlsx"),sheet = 2)
orrDf <- as.data.frame(orrDf[,c(1:4,9:10)])
colnames(orrDf) <- c("TCGA_ca", "PD1L1_responders","PD1L1_totalTreated", "ORR", "project","Additional_info")
# Select only lines with complete info in first 5 columns
orrDf_final <- orrDf[complete.cases(orrDf[,1:5]),]
# Change name for COAD MSI &MSS
orrDf_final$project[7:8] <- c("COAD_MSI", "COAD_MSS")
# UPDATED ONE
orrDf2 <- read_excel(paste0(otherInputData,"ORR new TABLE.xlsx"),sheet = 2)
orrDf2 <- as.data.frame(orrDf2[,c(1:4,9:10)])
colnames(orrDf2) <- c("TCGA_ca", "PD1L1_responders","PD1L1_totalTreated", "ORR", "project","Additional_info")
# Select only lines with complete info in first 5 columns
orrDf_final2 <- orrDf2[complete.cases(orrDf2[,1:5]),]
orrDf_final2$project <-str_replace(orrDf_final2$project," ","_")
## For common cancers, keep the updated ones and add the missing ones from old one.
## Common cancers
intersect(orrDf_final$project,orrDf_final2$project)
## [1] "ACC" "BLCA" "BRCA_TN" "CESC" "CHOL" "COAD_MSI"
## [7] "COAD_MSS" "UCEC" "ESCA" "TGCT" "GBM" "HNSC"
## [13] "LIHC" "SKCM" "MESO" "LUAD" "LUSC" "UVM"
## [19] "OV" "PAAD" "PRAD" "KIRC" "SARC" "STAD"
## [25] "KIRP" "KICH"
## Missed cancers in updated version
setdiff(orrDf_final$project,orrDf_final2$project)
## [1] "BRCA" "DLBC" "THYM" "MISC"
orrDf_final.missed <- orrDf_final %>% dplyr::filter(project %in% setdiff(orrDf_final$project,orrDf_final2$project)) %>% droplevels()
# Merge with updated one
orrDf <- rbind(orrDf_final2,orrDf_final.missed) %>% distinct() %>% droplevels()
# Change name for COAD MSI &MSS
# orrDf_final2$project[7:8] <- c("COAD_MSI", "COAD_MSS")
tcgaData.tp.sub.filt.orr <- merge(tcgaData.tp.sub.filt, orrDf %>% dplyr::select(ORR,project) %>% distinct() %>% droplevels(),
by="project", all=TRUE)
unique(tcgaData.tp.sub.filt.orr$project)
## [1] "ACC" "BLCA" "BRCA" "BRCA_TN" "CESC" "CHOL"
## [7] "COAD_MSI" "COAD_MSS" "DLBC" "ESCA" "GBM" "HNSC"
## [13] "KICH" "KIRC" "KIRP" "LGG" "LIHC" "LUAD"
## [19] "LUSC" "MESO" "MISC" "OV" "PAAD" "PCPG"
## [25] "PRAD" "READ" "SARC" "SKCM" "STAD" "TGCT"
## [31] "THCA" "THYM" "UCEC" "UCS" "UVM"
tcgaData.tp.sub.filt.orr <- merge(tcgaData.tp.sub.filt, orrDf %>% dplyr::select(ORR,project) %>% distinct() %>% droplevels(),
by="project", all.x=TRUE)
unique(tcgaData.tp.sub.filt.orr$project)
## [1] "ACC" "BLCA" "BRCA_TN" "CESC" "CHOL" "COAD_MSI"
## [7] "COAD_MSS" "ESCA" "GBM" "HNSC" "KICH" "KIRC"
## [13] "KIRP" "LGG" "LIHC" "LUAD" "LUSC" "MESO"
## [19] "OV" "PAAD" "PCPG" "PRAD" "READ" "SARC"
## [25] "SKCM" "STAD" "TGCT" "THCA" "UCS" "UVM"
length(unique(tcgaData.tp.sub.filt.orr$project))
## [1] 30
unique(orrDf$project)
## [1] "ACC" "BLCA" "BRCA_TN" "CESC" "CHOL" "COAD_MSI"
## [7] "COAD_MSS" "ESCA" "GBM" "HNSC" "KICH" "KIRC"
## [13] "KIRP" "LIHC" "LUAD" "LUSC" "MESO" "OV"
## [19] "PRAD" "PAAD" "SARC" "SKCM" "STAD" "TGCT"
## [25] "UCEC" "UVM" "BRCA" "DLBC" "THYM" "MISC"
immdata.TcrBcr.full.red <- loadRData(paste0(antiPDL1publicIntermediateData,"immdata.TcrBcr.full.red",modelTypeA,modelTypeB,modelTypeC,".RData"))
immdata.TcrBcr.full.red$response <- ifelse(immdata.TcrBcr.full.red$response=="CR+PR","CRPR",immdata.TcrBcr.full.red$response)
## FILTER DATA FOR PD, CR, PR (.sub)
immdata.TcrBcr.full.red.sub <- immdata.TcrBcr.full.red %>% dplyr::filter(response %in% c("PD","CRPR")) %>% droplevels()
## EXTRACT RESPONSE, NOMINAL, MEASURE VARIABLES FROM DATA
response.var <- "response"
nominal.vars <- c("dataset","tissue","gender")
measure.vars <-colnames(immdata.TcrBcr.full.red)[c(2:10,12,32:43)]
measure.vars <- measure.vars[c(6,22,1:3,20,21,4:5,10,7:9,11:19)]
## CHECK INFINITE LOG TMB VALUES
immdata.TcrBcr.full.red.sub[which(is.infinite(immdata.TcrBcr.full.red.sub$logTMB)),]
## [1] run_accession TuTACK_sigscore TIS_sigscore PDL1
## [5] CD8A CD8B TCRArichness.ds StromalScore
## [9] ImmuneScore ESTIMATEScore title age
## [13] gender disease_status biopsy_site primary_tumor
## [17] treatment prior_treatment response OS.time
## [21] OS PFS.time PFS pre_on_treatment
## [25] total_muts nonsyn_muts clonal_muts subclonal_muts
## [29] TMB tissue dataset Bcells
## [33] CD8cells CD4cells Tregs NKcells
## [37] DCcells Monocytes Granulocytes Macrophages
## [41] TumorPurity logTMB BCR.IGHrichness.ds
## <0 rows> (or 0-length row.names)
## Extract tissues in data so far
tissues <- unique(immdata.TcrBcr.full.red.sub$tissue)
# Which are the covariates I am considering
# covariatesIn <- measure.vars[c(1:2,4:7,14:16)]
covariatesIn <- measure.vars[c(1:2,4:7,14:16)]
# covariatesIn <- measure.vars[c(1:2,4:8,15:17)]
responseVar <- "response"
colnames(immdata.TcrBcr.full.red)[c(7,43,32,4)] <- c("TCR_Richness","BCR_Richness","B_cells","PD_L1")
immdata.TcrBcr.full.red$tissue <- ifelse(immdata.TcrBcr.full.red$tissue=="melanoma", "SKCM anti-PD-1/L1",
ifelse(immdata.TcrBcr.full.red$tissue=="RCC","KIRC anti-PD-1/L1",
ifelse(immdata.TcrBcr.full.red$tissue=="bladder","BLCA anti-PD-1/L1", ifelse(immdata.TcrBcr.full.red$tissue=="gastric","STAD anti-PD-1/L1",immdata.TcrBcr.full.red$tissue))))
immdata.TcrBcr.full.red$tissue <- factor(immdata.TcrBcr.full.red$tissue, levels=paste0(mycancertypes.final, " anti-PD-1/L1"))
parameters <- colnames(tcgaData.tp.sub.filt)[c(3:5,7:11)]
labels <- c("PD-L1 expression","logTMB","BCR Richness","B cells infiltration","CD8+ T-cells infilitration","CD4+ T-cells infilitration",
"T-cell inflamed GEP", "Tumor Purity")
tcgaData.tp.sub.filt.orr %>% dplyr::select(project,bcr_patient_barcode,ORR) %>% dplyr::group_by(project,ORR) %>% distinct() %>% dplyr::summarize(n())
## # A tibble: 30 x 3
## # Groups: project [30]
## project ORR `n()`
## <chr> <dbl> <int>
## 1 ACC 0.06 92
## 2 BLCA 0.181 411
## 3 BRCA_TN 0.0669 191
## 4 CESC 0.145 305
## 5 CHOL 0.0577 36
## 6 COAD_MSI 0.311 78
## 7 COAD_MSS 0 275
## 8 ESCA 0.172 185
## 9 GBM 0.0874 397
## 10 HNSC 0.146 528
## # ... with 20 more rows
## # i Use `print(n = ...)` to see more rows
tcga.cancers <- tcgaData.tp.sub.filt.orr %>% dplyr::pull(project) %>% unique()
orr_selected_cancers <- tcga.cancers[tcga.cancers %in% orrDf$project] # TOTAL 26 TCGA
Here I calculate:
I want to have a network diagram, preferably a circle one, with stable positions, so not using distances, or creating clusters of highly correlated features. This diagram ideally should present the strength/size of the correlation by thickness of connecting line, whether it is positive or negative, by color of line and ideally should only present significant correlations, meaning that for non-significant, there will be no line. Idea is to provide a table with the correlations and the significance as supplementary table, so people can see exact values. But the graph should support visually the patterns, especially the transition from global to multicancer. Then as supplementary we should have the same network graphs for each cancer type separately, again to see if patterns are similar. It’s way easier with the graphs
Calculate correlation matrices between features
## TCGA GLOBAL
tidy_cors.global <- tcgaData.tp.sub.filt %>% dplyr::filter(project %in% mycancertypes.reduced) %>%
dplyr::select(all_of(featuresIn)) %>%
setNames(featuresInLabels) %>%
correlate(use = "pairwise.complete.obs", # This is very important
method = "spearman", diagonal=1) #%>%
#stretch()
# Check significance of correlations
cormat.gb <- tidy_cors.global %>% column_to_rownames(var="term") %>% as.matrix()
# head(cormat)
upper_tri <- get_upper_tri(cormat.gb)
melted_cormat.gb <- reshape2::melt(cormat.gb, na.rm = TRUE)
# head(melted_cormat)
formCors.gb <- formatted_cors(tcgaData.tp.sub.filt %>% dplyr::filter(project %in% mycancertypes.reduced) %>%
dplyr::select(all_of(featuresIn)) %>% setNames(featuresInLabels), methodSel = NULL)
formCors.gb$measure2 <- gsub('\\.\\.',"+ ",formCors.gb$measure2 )
formCors.gb$measure2 <- gsub('\\.'," ",formCors.gb$measure2 )
formCors.gb$measure2 <- ifelse(formCors.gb$measure2=="PD L1 expression","PD-L1 expression",
ifelse(formCors.gb$measure2=="T cell inflamed GEP","T-cell inflamed GEP",
ifelse(formCors.gb$measure2=="CD8+ T cells infiltration","CD8+ T-cells infiltration",
ifelse(formCors.gb$measure2=="CD4+ T cells infiltration","CD4+ T-cells infiltration",formCors.gb$measure2))))
formCors.gb$measure1 <- factor(formCors.gb$measure1, levels = levels(melted_cormat.gb$Var1))
formCors.gb$measure2 <- factor(formCors.gb$measure2, levels = levels(melted_cormat.gb$Var2))
## TCGA MULTICOHORT (4)
tidy_cors.multi <- tcgaData.tp.sub.filt %>% dplyr::filter(project %in% mycancertypes.final) %>%
dplyr::select(all_of(featuresIn))%>%
setNames(featuresInLabels) %>%
correlate(use = "pairwise.complete.obs", # This is very important
method = "spearman") #%>%
#stretch()
cormat.ml <- tidy_cors.multi %>% column_to_rownames(var="term") %>% as.matrix()
# head(cormat)
upper_tri <- get_upper_tri(cormat.ml)
melted_cormat.ml <- reshape2::melt(cormat.ml, na.rm = TRUE)
# head(melted_cormat)
formCors.ml <- formatted_cors(tcgaData.tp.sub.filt %>% dplyr::filter(project %in% mycancertypes.final) %>%
dplyr::select(all_of(featuresIn))%>%
setNames(featuresInLabels) , methodSel = NULL)
formCors.ml$measure2 <- gsub('\\.\\.',"+ ",formCors.ml$measure2 )
formCors.ml$measure2 <- gsub('\\.'," ",formCors.ml$measure2 )
formCors.ml$measure2 <- ifelse(formCors.ml$measure2=="PD L1 expression","PD-L1 expression",
ifelse(formCors.ml$measure2=="T cell inflamed GEP","T-cell inflamed GEP",
ifelse(formCors.ml$measure2=="CD8+ T cells infiltration","CD8+ T-cells infiltration",
ifelse(formCors.ml$measure2=="CD4+ T cells infiltration","CD4+ T-cells infiltration",formCors.ml$measure2))))
formCors.ml$measure1 <- factor(formCors.ml$measure1, levels = levels(melted_cormat.ml$Var1))
formCors.ml$measure2 <- factor(formCors.ml$measure2, levels = levels(melted_cormat.ml$Var2))
### TCGA INDIVID MULTICANCER
tidy_cors.multi.sep <- sapply(mycancertypes.final, function(x) tcgaData.tp.sub.filt %>% dplyr::filter(project ==x) %>%
dplyr::select(all_of(featuresIn))%>%
setNames(featuresInLabels) %>%
correlate(use = "pairwise.complete.obs", # This is very important
method = "spearman"),simplify = FALSE)
cormat.ml.sep <- sapply(tidy_cors.multi.sep, function(x) x %>% column_to_rownames(var="term") %>% as.matrix(), simplify = FALSE)
# head(cormat)
upper_tri.ml.sep <- sapply(cormat.ml.sep, function(x) get_upper_tri(x), simplify = FALSE)
melted_cormat.ml.sep <- sapply(cormat.ml.sep, function(x) reshape2::melt(x, na.rm = TRUE), simplify = FALSE)
formCors.ml.sep <- sapply(mycancertypes.final, function(x) formatted_cors(tcgaData.tp.sub.filt %>% dplyr::filter(project ==x) %>%
dplyr::select(all_of(featuresIn))%>%
setNames(featuresInLabels) , methodSel = NULL),simplify = FALSE)
formCors.ml.sep$SKCM$measure2 <- gsub('\\.\\.',"+ ",formCors.ml.sep$SKCM$measure2)
formCors.ml.sep$SKCM$measure2 <- gsub('\\.'," ",formCors.ml.sep$SKCM$measure2 )
formCors.ml.sep$SKCM$measure2 <- ifelse(formCors.ml.sep$SKCM$measure2=="PD L1 expression","PD-L1 expression",
ifelse(formCors.ml.sep$SKCM$measure2=="T cell inflamed GEP","T-cell inflamed GEP",
ifelse(formCors.ml.sep$SKCM$measure2=="CD8+ T cells infiltration","CD8+ T-cells infiltration",
ifelse(formCors.ml.sep$SKCM$measure2=="CD4+ T cells infiltration","CD4+ T-cells infiltration",formCors.ml.sep$SKCM$measure2))))
formCors.ml.sep$SKCM$measure1 <- factor(formCors.ml.sep$SKCM$measure1, levels = levels(melted_cormat.ml.sep$SKCM$Var1))
formCors.ml.sep$SKCM$measure2 <- factor(formCors.ml.sep$SKCM$measure2, levels = levels(melted_cormat.ml.sep$SKCM$Var2))
## STAD
formCors.ml.sep$STAD$measure2 <- gsub('\\.\\.',"+ ",formCors.ml.sep$STAD$measure2)
formCors.ml.sep$STAD$measure2 <- gsub('\\.'," ",formCors.ml.sep$STAD$measure2 )
formCors.ml.sep$STAD$measure2 <- ifelse(formCors.ml.sep$STAD$measure2=="PD L1 expression","PD-L1 expression",
ifelse(formCors.ml.sep$STAD$measure2=="T cell inflamed GEP","T-cell inflamed GEP",
ifelse(formCors.ml.sep$STAD$measure2=="CD8+ T cells infiltration","CD8+ T-cells infiltration",
ifelse(formCors.ml.sep$STAD$measure2=="CD4+ T cells infiltration","CD4+ T-cells infiltration",formCors.ml.sep$STAD$measure2))))
formCors.ml.sep$STAD$measure1 <- factor(formCors.ml.sep$STAD$measure1, levels = levels(melted_cormat.ml.sep$STAD$Var1))
formCors.ml.sep$STAD$measure2 <- factor(formCors.ml.sep$STAD$measure2, levels = levels(melted_cormat.ml.sep$STAD$Var2))
##BLCA
formCors.ml.sep$BLCA$measure2 <- gsub('\\.\\.',"+ ",formCors.ml.sep$BLCA$measure2)
formCors.ml.sep$BLCA$measure2 <- gsub('\\.'," ",formCors.ml.sep$BLCA$measure2 )
formCors.ml.sep$BLCA$measure2 <- ifelse(formCors.ml.sep$BLCA$measure2=="PD L1 expression","PD-L1 expression",
ifelse(formCors.ml.sep$BLCA$measure2=="T cell inflamed GEP","T-cell inflamed GEP",
ifelse(formCors.ml.sep$BLCA$measure2=="CD8+ T cells infiltration","CD8+ T-cells infiltration",
ifelse(formCors.ml.sep$BLCA$measure2=="CD4+ T cells infiltration","CD4+ T-cells infiltration",formCors.ml.sep$BLCA$measure2))))
formCors.ml.sep$BLCA$measure1 <- factor(formCors.ml.sep$BLCA$measure1, levels = levels(melted_cormat.ml.sep$BLCA$Var1))
formCors.ml.sep$BLCA$measure2 <- factor(formCors.ml.sep$BLCA$measure2, levels = levels(melted_cormat.ml.sep$BLCA$Var2))
##KIRC
formCors.ml.sep$KIRC$measure2 <- gsub('\\.\\.',"+ ",formCors.ml.sep$KIRC$measure2)
formCors.ml.sep$KIRC$measure2 <- gsub('\\.'," ",formCors.ml.sep$KIRC$measure2 )
formCors.ml.sep$KIRC$measure2 <- ifelse(formCors.ml.sep$KIRC$measure2=="PD L1 expression","PD-L1 expression",
ifelse(formCors.ml.sep$KIRC$measure2=="T cell inflamed GEP","T-cell inflamed GEP",
ifelse(formCors.ml.sep$KIRC$measure2=="CD8+ T cells infiltration","CD8+ T-cells infiltration",
ifelse(formCors.ml.sep$KIRC$measure2=="CD4+ T cells infiltration","CD4+ T-cells infiltration",formCors.ml.sep$KIRC$measure2))))
formCors.ml.sep$KIRC$measure1 <- factor(formCors.ml.sep$KIRC$measure1, levels = levels(melted_cormat.ml.sep$KIRC$Var1))
formCors.ml.sep$KIRC$measure2 <- factor(formCors.ml.sep$KIRC$measure2, levels = levels(melted_cormat.ml.sep$KIRC$Var2))
global.net<-network_plot(tidy_cors.global,min_cor = .3, repel= TRUE, curved = TRUE,colours = c("navyblue", "white", "indianred2"),) + labs(color="Spearman's Correlation")
global.net$layers[[3]]$aes_params$family <- "Palatino Linotype"
global.net$layers[[3]]$aes_params$size <- 8
multi.net<-network_plot(tidy_cors.multi,min_cor = .3, repel= TRUE, curved = TRUE,colours = c("navyblue", "white", "indianred2"),) + labs(color="Spearman's Correlation")
multi.net$layers[[3]]$aes_params$family <- "Palatino Linotype"
multi.net$layers[[3]]$aes_params$size <- 8
full_netClusters <- ggarrange(plotlist = list(global.net, multi.net), common.legend = TRUE, legend="bottom",labels = c("A. Pancancer", "B. Multicancer"),font.label = list(size = 14, color = "black", face = "bold", family = "Palatino Linotype"))
full_netClusters
multi.net.sep<-sapply(tidy_cors.multi.sep, function(x) network_plot(x,min_cor = 0.3, repel= TRUE, curved = TRUE,colours = c("navyblue", "white", "indianred2"),) + labs(color="Spearman's Correlation"), simplify = FALSE)
multi.net.sep
## $SKCM
##
## $KIRC
##
## $BLCA
##
## $STAD
# Select only significant p-value correlations
formCors.ml.sign <- formCors.ml %>% dplyr::filter(sig_p==TRUE) %>% droplevels()
formCors.gb.sign <- formCors.gb %>% dplyr::filter(sig_p==TRUE) %>% droplevels()
cor.graph.ml <- as_tbl_graph(formCors.ml.sign, directed = FALSE) %>% activate(edges) %>%
dplyr::rename(weight = r)
# Select only significant p-value correlations
formCors.ml.sep.sign <- sapply(formCors.ml.sep, function(x) x %>% dplyr::filter(sig_p==TRUE) %>% droplevels(), simplify = FALSE)
cor.graph.ml.sep <- sapply(formCors.ml.sep.sign, function(x) as_tbl_graph(x, directed = FALSE) %>% activate(edges) %>%
dplyr::rename(weight = r), simplify = FALSE)
tcgaData.tp.sub.filt.orr %>% dplyr::select(project,bcr_patient_barcode,ORR) %>% dplyr::group_by(project,ORR) %>% distinct() %>% dplyr::summarize(n())
## # A tibble: 30 x 3
## # Groups: project [30]
## project ORR `n()`
## <chr> <dbl> <int>
## 1 ACC 0.06 92
## 2 BLCA 0.181 411
## 3 BRCA_TN 0.0669 191
## 4 CESC 0.145 305
## 5 CHOL 0.0577 36
## 6 COAD_MSI 0.311 78
## 7 COAD_MSS 0 275
## 8 ESCA 0.172 185
## 9 GBM 0.0874 397
## 10 HNSC 0.146 528
## # ... with 20 more rows
## # i Use `print(n = ...)` to see more rows
tcga.cancers <- tcgaData.tp.sub.filt.orr %>% pull(project) %>% unique()
orr_selected_cancers <- tcga.cancers[tcga.cancers %in% orrDf$project] # TOTAL 26 TCGA
orr_tcr <- orr_corrPlot_tcga(tcgaData.tp.sub.filt.orr ,"TCR_Richness", aggregateBy="project", median,"median", mergeByCol="project",orr_selected_cancers, orrDf, "TCR Richness", 20, 0.08,0.015, "pearson")
# orr_tcr
orr_bcr <- orr_corrPlot_tcga(tcgaData.tp.sub.filt.orr ,"BCR_Richness", aggregateBy="project", median,"median", mergeByCol="project",orr_selected_cancers, orrDf, "BCR Richness", 24, 0.08,0.015, "pearson")
leg <- get_legend(orr_bcr +theme(legend.position="bottom", legend.box = "horizontal"))
pOrr <-ggdraw() +
draw_plot(orr_tcr +rremove("legend"),x = 0, y = 0.06, width = .5, height = .93) +
draw_plot(orr_bcr +rremove("legend"),x = 0.5, y = 0.06, width = .5, height = .93) +
draw_plot(leg,x = 0.45, y = 0, width = .1, height = .05)+
draw_plot_label(label = c("A", "B"), size = 15,
x = c(0, 0.5), y = c(1, 1),family="Palatino Linotype")
pOrr
others <- c("logTMB","PD_L1", "TIS_sigscore","CD8cells", "CD4cells", "B_cells", "TumorPurity")
othersLabels <- featuresInLabels[3:9]
othersxPlus <-c(.7,.7, .3, .03, .03, 0.035,.07)
othersyPlus <-c(0.08, 0.08, 0.08, 0.08, 0.08, 0.08, 0.08)
othersyMinus <-c(0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015)
othersParam <- list(others, othersLabels, othersxPlus, othersyPlus, othersyMinus)
orr_others <- sapply(1:7, function(x) orr_corrPlot_tcga(tcgaData.tp.sub.filt.orr ,othersParam[[1]][x], aggregateBy="project", median,"median", mergeByCol="project",orr_selected_cancers, orrDf, othersParam[[2]][x],othersParam[[3]][x], othersParam[[4]][x], othersParam[[5]][x], "pearson"), simplify = FALSE)
pOrr.other <- ggarrange(plotlist = orr_others, legend = "bottom", common.legend = TRUE, ncol=3, nrow=3,labels = c(paste0(paste0(LETTERS[1:7],""))),font.label = list(size = 16, color = "black", face = "bold", family = "Palatino Linotype"))
pOrr.other
others <- featuresIn
othersLabels <- featuresInLabels
othersxPlus <-c(20,24,.7,.7, .3, .03, .03, 0.035,.07)
othersyPlus <-c(0.08,0.08,0.08, 0.08, 0.08, 0.08, 0.08, 0.08, 0.08)
othersyMinus <-c(0.015,0.015,0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015)
othersParam <- list(others, othersLabels, othersxPlus, othersyPlus, othersyMinus)
orr_others <- sapply(1:9, function(x) orr_corrPlot_tcga(tcgaData.tp.sub.filt.orr ,othersParam[[1]][x], aggregateBy="project", median,"median", mergeByCol="project",orr_selected_cancers, orrDf, othersParam[[2]][x],othersParam[[3]][x], othersParam[[4]][x], othersParam[[5]][x], "pearson"), simplify = FALSE)
# orr_others
pOrr.all <- ggarrange(plotlist = orr_others, legend = "bottom", common.legend = TRUE, ncol=3, nrow=3,labels = c(paste0(paste0(LETTERS[1:9],". "),othersLabels)),font.label = list(size = 12, color = "black", face = "bold", family = "Palatino Linotype"), align="v",
hjust=c(-0.34,-0.34,-0.5,-0.26,-0.24,-0.2,-0.2,-0.27,-0.345))
pOrr.all
pOrr.all2 <- ggarrange(plotlist = orr_others, legend = "bottom", common.legend = TRUE, ncol=3, nrow=3,labels = "AUTO",font.label = list(size = 12, color = "black", face = "bold", family = "Palatino Linotype"))
pOrr.all2
box_tcr_cd8 <- grid.draw(TwosigScores_boxplot_tcga(tcgaData.tp.sub.filt,mycancertypes.reduced,"project","CD8cells","TCR_Richness",median,"median","CD8+ T cells infiltration","TCR Richness",-280,-245.0,rollSum=TRUE))
box_tcr_cd8
box_bcr_bcells <- TwosigScores_boxplot_tcga(tcgaData.tp.sub.filt,mycancertypes.reduced,"project","B_cells","BCR_Richness",median,"median","B cell infiltration","BCR Richness",-310.3,-265.0,rollSum=TRUE)
# box_bcr_bcells
box.tcr.bcr <- ggdraw() +
draw_plot(TwosigScores_boxplot_tcga(tcgaData.tp.sub.filt,mycancertypes.reduced,"project","CD8cells","TCR_Richness",median,"median","CD8+ T cells infiltration","TCR Richness",-910,-750.0,rollSum=TRUE),x = 0, y = 0.5, width = 1, height = 0.5) +
draw_plot(TwosigScores_boxplot_tcga(tcgaData.tp.sub.filt,mycancertypes.reduced,"project","B_cells","BCR_Richness",median,"median","B cell infiltration","BCR Richness",-820,-680.0,rollSum=TRUE),x = 0, y = 0.001, width = 1, height = 0.5) +
draw_plot_label(label = c("A", "B"), size = 15,
x = c(0, 0), y = c(1, 0.5),family="Palatino Linotype")
box.tcr.bcr <- ggdraw() +
draw_plot(TwosigScores_boxplot_tcga(tcgaData.tp.sub.filt,mycancertypes.reduced,"project","CD8cells","TCR_Richness",median,"median","CD8+ T cells infiltration","TCR Richness",-.04,-.00001,logY=FALSE, segm = c(250,500), ylims=c(0,1800), ticks = c(50,300),relHeights = c(1,0,.4),rollSum=TRUE),x = 0, y = 0.66, width = 1, height = 0.33) +
draw_plot(TwosigScores_boxplot_tcga(tcgaData.tp.sub.filt,mycancertypes.reduced,"project","TIS_sigscore","TCR_Richness",median,"median","T cell inflamed GEP","TCR Richness",-.04,-.00001,logY=FALSE, segm = c(250,500), ylims=c(0,1800), ticks = c(40,300),relHeights = c(1,0,.4),rollSum=TRUE),x = 0, y = 0.33, width = 1, height = 0.33) +
draw_plot(TwosigScores_boxplot_tcga(tcgaData.tp.sub.filt,mycancertypes.reduced,"project","B_cells","BCR_Richness",median,"median","B cells infiltration","BCR Richness",-.04,-.00001,logY=FALSE, segm = c(320,500), ylims=c(0,1800), ticks = c(50,300),relHeights = c(1,0,.4),rollSum=TRUE),x = 0, y = 0, width = 1, height = 0.33) +
draw_plot_label(label = c("A", "B","C"), size = 15,
x = c(0, 0, 0), y = c(1, 0.66,0.33),family="Palatino Linotype")
box.tcr.bcr
p.bcr <-ggdraw() +
draw_plot(TwosigScores_boxplot_tcga(tcgaData.tp.sub.filt,mycancertypes.reduced,"project","CD8cells","BCR_Richness",median,"median","CD8+ T cells infiltration","BCR Richness",-.04,-.00001,logY=FALSE, segm = c(340,500), ylims=c(0,1800), ticks = c(50,300),relHeights = c(1,0,.4),rollSum=TRUE),x = 0, y = 0.5, width = 1, height = 0.49) +
draw_plot(TwosigScores_boxplot_tcga(tcgaData.tp.sub.filt,mycancertypes.reduced,"project","TIS_sigscore","BCR_Richness",median,"median","T cell inflamed GEP","BCR Richness",-.04,-.00001,logY=FALSE, segm = c(340,500), ylims=c(0,1800), ticks = c(50,300),relHeights = c(1,0,.4),rollSum=TRUE),x = 0, y = 0, width = 1, height = 0.49) +
draw_plot_label(label = c("A", "B"), size = 15,
x = c(0, 0), y = c(1, 0.5),family="Palatino Linotype")
p.bcr
p.bcr_h <-ggdraw() +
draw_plot(TwosigScores_boxplot_tcga(tcgaData.tp.sub.filt,mycancertypes.reduced,"project","CD8cells","BCR_Richness",median,"median","CD8+ T cells infiltration","BCR Richness",-.04,-.00001,logY=FALSE, segm = c(340,500), ylims=c(0,1800), ticks = c(50,300),relHeights = c(1,0,.4),rollSum=TRUE),
x = 0, y = 0, width = 0.49, height = 1) +
draw_plot(TwosigScores_boxplot_tcga(tcgaData.tp.sub.filt,mycancertypes.reduced,"project","TIS_sigscore","BCR_Richness",median,"median","T cell inflamed GEP","BCR Richness",-.04,-.00001,logY=FALSE, segm = c(340,500), ylims=c(0,1800), ticks = c(50,300),relHeights = c(1,0,.4),rollSum=TRUE),
x = 0.5, y = 0, width = 0.49, height = 1) +
draw_plot_label(label = c("A", "B"), size = 16,
x = c(0, 0.5), y = c(1, 1),family="Palatino Linotype")
p.bcr_h
Using complete data across BCR, TCR, CD8, GEP and Bcells:
p.bcr2 <- ggdraw() +
draw_plot(TwosigScores_boxplot_tcga(tcgaData.tp.sub.filt,mycancertypes.reduced,"project","CD8cells","BCR_Richness",median,"median","CD8+ T cells infiltration","BCR Richness",-.04,-.00001,logY=FALSE, segm = c(340,500), ylims=c(0,1800), ticks = c(50,300),relHeights = c(1,0,.4),rollSum=TRUE),x = 0, y = 0.5, width = 1, height = 0.49) +
draw_plot(TwosigScores_boxplot_tcga(tcgaData.tp.sub.filt,mycancertypes.reduced,"project","TIS_sigscore","BCR_Richness",median,"median","T cell inflamed GEP","BCR Richness",-.04,-.00001,logY=FALSE, segm = c(340,500), ylims=c(0,1800), ticks = c(50,300),relHeights = c(1,0,.4),rollSum=TRUE,filterBY = c("BCR_Richness","TCR_Richness","CD8cells","TIS_sigscore","B_cells")),x = 0, y = 0, width = 1, height = 0.49) +
draw_plot_label(label = c("A", "B"), size = 15,
x = c(0, 0), y = c(1, 0.5),family="Palatino Linotype")
p.bcr2
Correlation tables with significance results for:
# Individual correlations
formCors.sep <- sapply(mycancertypes.final, function(x) formatted_cors(tcgaData.tp.sub.filt %>% dplyr::filter(project ==x) %>%
dplyr::select(all_of(featuresIn)) %>% setNames(featuresInLabels), methodSel = NULL), simplify = FALSE)
formCors.sep <- ldply(formCors.sep)
formCors.sep$measure2 <- gsub('\\.\\.',"+ ",formCors.sep$measure2 )
formCors.sep$measure2 <- gsub('\\.'," ",formCors.sep$measure2 )
formCors.sep$measure2 <- ifelse(formCors.sep$measure2=="PD L1 expression","PD-L1 expression",
ifelse(formCors.sep$measure2=="T cell inflamed GEP","T-cell inflamed GEP",
ifelse(formCors.sep$measure2=="CD8+ T cells infiltration","CD8+ T-cells infiltration",
ifelse(formCors.sep$measure2=="CD4+ T cells infiltration","CD4+ T-cells infiltration",formCors.sep$measure2))))
# Spearman's rho rank correlation coefficients for all possible pairs of columns of a matrix
# Ranks are computed using efficient algorithms
formCors.gb %>% head()
## # A tibble: 6 x 8
## measure1 measure2 r n P sig_p p_if_sig r_if_sig
## <fct> <fct> <dbl> <dbl> <dbl> <lgl> <dbl> <dbl>
## 1 TCR Richness TCR Richness 1 3914 NA NA NA NA
## 2 TCR Richness BCR Richness 0.590 3914 0 TRUE 0 0.590
## 3 BCR Richness BCR Richness 1 3914 NA NA NA NA
## 4 TCR Richness logTMB 0.00485 3914 0.761 FALSE NA NA
## 5 BCR Richness logTMB 0.217 3914 0 TRUE 0 0.217
## 6 logTMB logTMB 1 3914 NA NA NA NA
formCors.ml %>% head()
## # A tibble: 6 x 8
## measure1 measure2 r n P sig_p p_if_sig r_if_sig
## <fct> <fct> <dbl> <dbl> <dbl> <lgl> <dbl> <dbl>
## 1 TCR Richness TCR Richness 1 835 NA NA NA NA
## 2 TCR Richness BCR Richness 0.668 835 0 TRUE 0 0.668
## 3 BCR Richness BCR Richness 1 835 NA NA NA NA
## 4 TCR Richness logTMB -0.157 835 0.00000542 TRUE 0.00000542 -0.157
## 5 BCR Richness logTMB 0.102 835 0.00328 TRUE 0.00328 0.102
## 6 logTMB logTMB 1 835 NA NA NA NA
formCors.sep %>% head()
## .id measure1 measure2 r n P sig_p p_if_sig
## 1 SKCM TCR Richness TCR Richness 1.0000000 56 NA NA NA
## 2 SKCM TCR Richness BCR Richness 0.6077879 56 6.773896e-07 TRUE 6.773896e-07
## 3 SKCM BCR Richness BCR Richness 1.0000000 56 NA NA NA
## 4 SKCM TCR Richness logTMB 0.3617825 56 6.148641e-03 TRUE 6.148641e-03
## 5 SKCM BCR Richness logTMB 0.2529044 56 6.002960e-02 FALSE NA
## 6 SKCM logTMB logTMB 1.0000000 56 NA NA NA
## r_if_sig
## 1 NA
## 2 0.6077879
## 3 NA
## 4 0.3617825
## 5 NA
## 6 NA
newColnames <- c("Biomarker 1", "Biomarker 2", "Spearman's rho correlation", "N (observations)", "P-value")
formCors.gb<- formCors.gb %>%
dplyr::select(all_of(c("measure1","measure2","r","n","P"))) %>%
setNames(newColnames)
formCors.ml<- formCors.ml %>%
dplyr::select(all_of(c("measure1","measure2","r","n","P"))) %>%
setNames(newColnames)
formCors.sep<- formCors.sep %>%
dplyr::select(all_of(c(".id","measure1","measure2","r","n","P"))) %>%
setNames(c("TCGA cohort",newColnames))
#Pairs with fewer than 2 non-missing values have the r values set to NA
# Uses midranks in case of ties, as described by Hollander and Wolfe. P-values are approximated by using the t or F distributions.
##################
headerStyle1 <- createStyle(
fontSize = 11, fontColour = "black",
fgFill = NULL, halign = "left",textDecoration = "bold"
)
TitleStyle1 <- createStyle(
fontSize = 14, fontColour = "black",
fgFill = NULL, halign = "left",textDecoration = NULL
)
TitleStyle2 <- createStyle(
fontSize = 12, fontColour = "black",
fgFill = NULL, halign = "left",textDecoration = "bold"
)
TitleStyle3 <- createStyle(
fontSize = 11, fontColour = "black",
fgFill = NULL, halign = "left",textDecoration = NULL
)
wb <- createWorkbook()
# PANCANCER
addWorksheet(wb, "Pan-cancer")
writeData(wb, "Pan-cancer", "Supplementary table 1. Biomarkers pairwise Spearman's correlations", startCol = 1, startRow = 1)
addStyle(wb = wb, sheet = 1, rows = 1, cols = 1, style = TitleStyle1)
# Second
writeData(wb, "Pan-cancer","PAN-CANCER", startCol = 1, startRow = 3)
addStyle(wb = wb, sheet = 1, rows = 3, cols = 1, style = TitleStyle2)
writeData(wb, "Pan-cancer","Assymptotic P-values are shown, apptoximated by using the t or F distributions", startCol = 1, startRow = 4)
addStyle(wb = wb, sheet = 1, rows = 4, cols = 1, style = TitleStyle3)
writeData(wb, "Pan-cancer","Pairs with fewer than 2 non-missing values have the Spearman's r values set to NA", startCol = 1, startRow = 5)
addStyle(wb = wb, sheet = 1, rows = 5, cols = 1, style = TitleStyle3)
# Write table with data
writeData(wb, "Pan-cancer", formCors.gb %>% as.data.frame(), startCol = 1, startRow = 7, rowNames = FALSE,headerStyle = headerStyle1)
## MULTICANCER
addWorksheet(wb, "Multicancer")
writeData(wb, "Multicancer", "Supplementary table 1. Biomarkers pairwise Spearman's correlations", startCol = 1, startRow = 1)
addStyle(wb = wb, sheet = 2, rows = 1, cols = 1, style = TitleStyle1)
# Second
writeData(wb, "Multicancer","MULTI-CANCER", startCol = 1, startRow = 3)
addStyle(wb = wb, sheet = 2, rows = 3, cols = 1, style = TitleStyle2)
writeData(wb, "Multicancer","Assymptotic P-values are shown, apptoximated by using the t or F distributions", startCol = 1, startRow = 4)
addStyle(wb = wb, sheet = 2, rows = 4, cols = 1, style = TitleStyle3)
writeData(wb, "Multicancer","Pairs with fewer than 2 non-missing values have the Spearman's r values set to NA", startCol = 1, startRow = 5)
addStyle(wb = wb, sheet = 2, rows = 5, cols = 1, style = TitleStyle3)
# Write table with data
writeData(wb, "Multicancer", formCors.ml %>% as.data.frame(), startCol = 1, startRow = 7, rowNames = FALSE,headerStyle = headerStyle1)
## INDIVIDUAL
addWorksheet(wb, "Individual cohorts")
writeData(wb, "Individual cohorts", "Supplementary table 1. Biomarkers pairwise Spearman's correlations", startCol = 1, startRow = 1)
addStyle(wb = wb, sheet = 3, rows = 1, cols = 1, style = TitleStyle1)
# Second
writeData(wb, "Individual cohorts","INDIVIDUAL TCGA COHORTS (multicancer)", startCol = 1, startRow = 3)
addStyle(wb = wb, sheet = 3, rows = 3, cols = 1, style = TitleStyle2)
writeData(wb, "Individual cohorts","Assymptotic P-values are shown, apptoximated by using the t or F distributions", startCol = 1, startRow = 4)
addStyle(wb = wb, sheet = 3, rows = 4, cols = 1, style = TitleStyle3)
writeData(wb, "Pan-cancer","Pairs with fewer than 2 non-missing values have the Spearman's r values set to NA", startCol = 1, startRow = 5)
addStyle(wb = wb, sheet = 3, rows = 5, cols = 1, style = TitleStyle3)
# Write table with data
writeData(wb, "Individual cohorts", formCors.sep %>% as.data.frame(), startCol = 1, startRow = 7, rowNames = FALSE,headerStyle = headerStyle1)
saveWorkbook(wb,paste0(projectOutDataPath,"/","Supplemental File 1.xlsx"), overwrite = TRUE)
Extract GO BP annotation for specific processes and all their child processes
# Annotation of GO Identifiers to their Biological Process Offspring
lumphActiv <- c("GO:0002285",get("GO:0002285", GOBPOFFSPRING))
agPresent <- c("GO:0019882",get("GO:0019882", GOBPOFFSPRING))
igProd <- c("GO:0002377",get("GO:0002377", GOBPOFFSPRING))
ifng.a <- c("GO:0034341",get("GO:0034341", GOBPOFFSPRING))
ifng.b <- c("GO:0032609",get("GO:0032609", GOBPOFFSPRING))
AgSignaling <- c("GO:0050851", get("GO:0050851",GOBPOFFSPRING))#
bp_ImmuneRelated.focused <- list(#"T cell activation" = tCellActiv,
"Lymphocyte activation involved in immune response" = lumphActiv,
"Antigen processing and presentation" = agPresent,
"Antigen receptor mediated signaling" = AgSignaling,
"Immunoglobulin production" = igProd,
"Response to type II interferon" = ifng.a,
"Type II interferon production" = ifng.b#,
# "B cell immunity"=bcellImmunity,
# "T cell immunity"=tcellImmunity
)
bp_ImmuneRelated.focused <- sapply(bp_ImmuneRelated.focused, function(x) as.data.frame(x), simplify=FALSE)
bp_ImmuneRelated.focused.DF <- ldply(bp_ImmuneRelated.focused)
colnames(bp_ImmuneRelated.focused.DF) <- c("Parent Description","ID")
Load GO ORA results
newCol <- "BCR"
suffix <- paste0("_",newCol)
Only upregulated
## MELANOMA
skcm.res = get_geneLists(DEA_GOoutData,paste0("SKCM.Degs.dataset_lfc0",suffix), lfcThres=0)
## RCC
kirc.res = get_geneLists(DEA_GOoutData,paste0("KIRC.Degs.dataset_lfc0",suffix), lfcThres=0)
## BLADDER
blca.res = get_geneLists(DEA_GOoutData,paste0("BLCA.Degs.dataset_lfc0",suffix), lfcThres=0)
## GASTRIC
stad.res = get_geneLists(DEA_GOoutData,paste0("STAD.Degs.dataset_lfc0",suffix), lfcThres=0)
## All together
go.all <- sapply(list("SKCM"=skcm.res,"KIRC"=kirc.res,"BLCA" = blca.res, "STAD" = stad.res), function(x) x[[3]], simplify = FALSE )
go.all.DF <- ldply(go.all)
go.all.DF.up <- go.all.DF %>% dplyr::filter(group=="upregulated")
#
# skcm_genes <- loadRData(paste0(DEA_GOoutData,"/geneBackground","_SKCM",Rdata.suffix))
# blca_genes <- loadRData(paste0(DEA_GOoutData,"/geneBackground","_BLCA",Rdata.suffix))
# kirc_genes <- loadRData(paste0(DEA_GOoutData,"/geneBackground","_KIRC",Rdata.suffix))
# stad_genes <- loadRData(paste0(DEA_GOoutData,"/geneBackground","_STAD",Rdata.suffix))
#
#
# ## Separate histologies
# if(isTRUE(runClean)){
#
# bp <- sapply(go.all, function(x) enrichGO(x$gene, ont="BP", OrgDb = 'org.Hs.eg.db',keyType = "SYMBOL",pAdjustMethod = "BH", universe=skcm_genes,
# pvalueCutoff = 0.05,
# qvalueCutoff = 0.05,minGSSize=5), simplify = FALSE)
# save(bp,file=paste0(DEA_GOoutData,"bpGO_allTCGA_res",suffix,Rdata.suffix))
# }else{
# bp_bcr_0<- loadRData(paste0(DEA_GOoutData,"bpGO_allTCGA_res",suffix,Rdata.suffix))
# }
# Load GO enrichment results
bp_bcr_0<- loadRData(paste0(DEA_GOoutData,"bpGO_allTCGA_res",suffix,Rdata.suffix))
## Drop GO terms not included in our focused GO terms set,
bp_bcr_0.filt <- sapply(bp_bcr_0, function(x) dropGO(x, level = NULL, term = setdiff(x %>% as.data.frame() %>% pull(ID),bp_ImmuneRelated.focused.DF$ID)), simplify = FALSE)
# Then Similarity analysis on reduced terms
if(isTRUE(runFiltSim)){
bp_bcr_0.filt.red <- sapply(bp_bcr_0.filt, function(x) simplify(x, cutoff=0.45, by="p.adjust", select_fun=min, measure="Wang"), simplify = FALSE)
save(bp_bcr_0.filt.red,file=paste0(DEA_GOoutData,"bpGO_allTCGA_res_filt_reduced",suffix,Rdata.suffix))
## REMEMBER: The higher the threshold, the more the redundant terms are removed
}else{
bp_bcr_0.filt.red <- loadRData(paste0(DEA_GOoutData,"bpGO_allTCGA_res_filt_reduced",suffix,Rdata.suffix))
}
bp_bcr_0.filt.red.DFs <- sapply(bp_bcr_0.filt.red, function(x) x %>% as.data.frame() %>%
dplyr::select(-geneID) %>%
dplyr::mutate(category=bp_ImmuneRelated.focused.DF$`Parent Description`[match(ID,bp_ImmuneRelated.focused.DF$ID)]) %>%
dplyr::mutate("GeneRatio"=parse_ratio(GeneRatio)), simplify=FALSE)
bp_bcr_0.filt.red.merged <- ldply(bp_bcr_0.filt.red.DFs)
bubble_bcr.cancers.filt.red <- sapply(bp_bcr_0.filt.red.DFs , function(x) GO_bubblePlot(DF=x,
sortVar="GeneRatio",xVar="GeneRatio",yVar="Description",sizeVar="Count",fillVar="p.adjust", aggreg=FALSE,choosePlot="norm",sizeMin=2,sizeMax=182, xMax=ceiling_dec(max(bp_bcr_0.filt.red.merged$GeneRatio),2), seqN=10, fillMax=max(bp_bcr_0.filt.red.merged$p.adjust)), simplify = FALSE)
pBub_bcr_0.filt.red <-ggarrange(plotlist = bubble_bcr.cancers.filt.red,
nrow=1, ncol=4, labels = c("SKCM", "KIRC", "BLCA", "STAD"),font.label = list(size = 14, color = "black", face = "bold", family = "Palatino Linotype"), common.legend = T, legend = "bottom")
pBub_bcr_0.filt.red
go_description <-get_names(bp_ImmuneRelated.focused.DF$ID)
bps_filtered_df <- bp_ImmuneRelated.focused.DF
bps_filtered_df$`BP Offspring Term description` <- go_description$go_name
colnames(bps_filtered_df)[1]<- "BP Term description"
#################
##################
headerStyle1 <- createStyle(
fontSize = 11, fontColour = "black",
fgFill = NULL, halign = "left",textDecoration = "bold"
)
TitleStyle1 <- createStyle(
fontSize = 14, fontColour = "black",
fgFill = NULL, halign = "left",textDecoration = NULL
)
TitleStyle2 <- createStyle(
fontSize = 12, fontColour = "black",
fgFill = NULL, halign = "left",textDecoration = "bold"
)
TitleStyle3 <- createStyle(
fontSize = 11, fontColour = "black",
fgFill = NULL, halign = "left",textDecoration = NULL
)
var <- c("SKCM", "KIRC", "BLCA", "STAD")
wb <- createWorkbook()
addWorksheet(wb, "Focused BPs")
writeData(wb, "Focused BPs", "Supplementary table 3. Gene ontology biological processes (BP) focused set, including annotations of all GO annotation offsprings", startCol = 1, startRow = 1)
addStyle(wb = wb, sheet = 1, rows = 1, cols = 1, style = TitleStyle1)
# Second
writeData(wb, "Focused BPs","GO Biological processes:", startCol = 1, startRow = 3)
addStyle(wb = wb, sheet = 1, rows = 3, cols = 1, style = TitleStyle2)
writeData(wb, "Focused BPs",paste(paste0(seq(1:6),". ",unique(bps_filtered_df$`BP Term description`)), collapse=", "), startCol = 1, startRow = 4)
addStyle(wb = wb, sheet = 1, rows = 4, cols = 1, style = TitleStyle3)
# Write table woth data
writeData(wb, "Focused BPs", bps_filtered_df, startCol = 1, startRow = 6, rowNames = FALSE,headerStyle = headerStyle1)
condition="BCR-"
for(i in seq(1:4)){
# i=1
# Create sheet
addWorksheet(wb, paste0(condition,var[i]))
# Start writing title
# First
writeData(wb, sheet=paste0(condition,var[i]), "Supplementary table 3. Gene ontology biological processes associated with the differentially upregulated genes in BCR Richness high samples compared to BCR Richness low samples", startCol = 1, startRow = 1)
addStyle(wb = wb, sheet=paste0(condition,var[i]), rows = 1, cols = 1, style = TitleStyle1)
# Second
writeData(wb, sheet=paste0(condition,var[i]), paste0(var[i]," - GO BIOLOGICAL PROCESSES WITH A SIGNIFICANT ASSOCIATION TO THE UPREGULATED GENES (FDR<0.05) "), startCol = 1, startRow = 3)
addStyle(wb = wb, sheet=paste0(condition,var[i]), rows = 3, cols = 1, style = TitleStyle2)
# Third
writeData(wb, sheet=paste0(condition,var[i]), "Adjusted p-value cutoff of enrichment test set to 0.05, method of adjustment used Benjamini-Hochberg", startCol = 1, startRow = 4)
addStyle(wb = wb, sheet=paste0(condition,var[i]), rows = 4, cols = 1, style = TitleStyle3)
# Fourth
writeData(wb, sheet=paste0(condition,var[i]), "Q-value cutoff of enrichment tests to report as significant set to 0.05. ", startCol = 1, startRow = 5)
addStyle(wb = wb, sheet=paste0(condition,var[i]), rows = 5, cols = 1, style = TitleStyle3)
# Write table woth data
writeData(wb, paste0(condition,var[i]), bp_bcr_0[[i]], startCol = 1, startRow = 7, rowNames = TRUE,headerStyle = headerStyle1)
}
saveWorkbook(wb,paste0(projectOutDataPath,"/","Supplemental File 3.xlsx"), overwrite = TRUE)
columnsDEGs <- c("Symbol","Ensembl_ID","Entrez_ID", "baseMean", "FC","lfcSE", "pvalue" , "padj")
selectCols <- c("gene","ensembl","entrez","baseMean", "FC","lfcSE", "pvalue","padj")
########################################
var <- c("SKCM", "KIRC", "BLCA", "STAD")
degs <- list(skcm.res, kirc.res, blca.res, stad.res)
wb <- createWorkbook()
condition="BCR-"
for(i in seq(1:4)){
# i=1
# Create sheet
addWorksheet(wb, paste0(condition,var[i]))
# Start writing title
# First
writeData(wb, sheet=paste0(condition,var[i]), "Supplementary table 2. Differentially expressed genes in BCR Richness high samples compared to BCR Richness low samples", startCol = 1, startRow = 1)
addStyle(wb = wb, sheet=paste0(condition,var[i]), rows = 1, cols = 1, style = TitleStyle1)
# Second
writeData(wb, sheet=paste0(condition,var[i]), paste0(var[i]," - DIFFERENTIALLY EXPRESSED GENES (FDR<0.05) "), startCol = 1, startRow = 3)
addStyle(wb = wb, sheet=paste0(condition,var[i]), rows = 3, cols = 1, style = TitleStyle2)
# Third
writeData(wb, sheet=paste0(condition,var[i]), "Adjusted p-value cutoff (FDR) set to 0.05, method of adjustment used Benjamini-Hochberg", startCol = 1, startRow = 4)
addStyle(wb = wb, sheet=paste0(condition,var[i]), rows = 4, cols = 1, style = TitleStyle3)
# Fourth
writeData(wb, sheet=paste0(condition,var[i]), "apeglm estimator used for the log2 fold change (LFC) shrinkage and DESeq2 R package used for the differential expression analysis", startCol = 1, startRow = 5)
addStyle(wb = wb, sheet=paste0(condition,var[i]), rows = 5, cols = 1, style = TitleStyle3)
# Write table woth data
writeData(wb, paste0(condition,var[i]), degs[[i]]$gL.df %>%
dplyr::select(all_of(selectCols)) %>%
setNames(columnsDEGs), startCol = 1, startRow = 7, rowNames = FALSE,headerStyle = headerStyle1)
}
saveWorkbook(wb,paste0(projectOutDataPath,"/","Supplemental File 2.xlsx"), overwrite = TRUE)
Load GO ORA results
newCol <- "PD1L1"
suffix <- paste0("_",newCol)
Only upregulated
#Loading enrichment results
bp_pdl1<- loadRData(paste0(DEA_GOoutData,"bpGO_allTCGA_res",suffix,Rdata.suffix))
## Drop GO terms not included in our focused GO terms set,
bp_pdl1.filt <- sapply(bp_pdl1, function(x) dropGO(x, level = NULL, term = setdiff(x %>% as.data.frame() %>% pull(ID),bp_ImmuneRelated.focused.DF$ID)), simplify = FALSE)
if(isTRUE(runFiltSim)){
# Then Similarity analysis on reduced terms
bp_pdl1.filt.red <- sapply(bp_pdl1.filt, function(x) simplify(x, cutoff=0.45, by="p.adjust", select_fun=min, measure="Wang"), simplify = FALSE)
## REMEMBER: The higher the threshold, the more the redundant terms are removed
save(bp_pdl1.filt.red,file=paste0(DEA_GOoutData,"bpGO_allTCGA_res_filt_reduced",suffix,Rdata.suffix))
}else{
bp_pdl1.filt.red <- loadRData(paste0(DEA_GOoutData,"bpGO_allTCGA_res_filt_reduced",suffix,Rdata.suffix))
}
bp_pdl1.filt.red.DFs <- sapply(bp_pdl1.filt.red, function(x) x %>% as.data.frame() %>%
dplyr::select(-geneID) %>%
dplyr::mutate(category=bp_ImmuneRelated.focused.DF$`Parent Description`[match(ID,bp_ImmuneRelated.focused.DF$ID)]) %>%
dplyr::mutate("GeneRatio"=parse_ratio(GeneRatio)), simplify=FALSE)
bp_pdl1.filt.red.merged <- ldply(bp_pdl1.filt.red.DFs)
max(bp_pdl1.filt.red.DFs$SKCM$GeneRatio,bp_pdl1.filt.red.DFs$KIRC$GeneRatio,bp_pdl1.filt.red.DFs$BLCA$GeneRatio,bp_pdl1.filt.red.DFs$STAD$GeneRatio)
## [1] 0.06276661
min(bp_pdl1.filt.red.DFs$SKCM$GeneRatio,bp_pdl1.filt.red.DFs$KIRC$GeneRatio,bp_pdl1.filt.red.DFs$BLCA$GeneRatio,bp_pdl1.filt.red.DFs$STAD$GeneRatio)
## [1] 0.001599744
bubble_pdl1.cancers.filt.red <- sapply(bp_pdl1.filt.red.DFs , function(x) GO_bubblePlot(DF=x,
sortVar="GeneRatio",xVar="GeneRatio",yVar="Description",sizeVar="Count",fillVar="p.adjust", aggreg=FALSE,choosePlot="norm",sizeMin=2,sizeMax=182, xMax=ceiling_dec(max(bp_pdl1.filt.red.merged$GeneRatio),2), seqN=10, fillMax=max(bp_pdl1.filt.red.merged$p.adjust)), simplify = FALSE)
pBub_pdl1.filt.red <-ggarrange(plotlist = bubble_pdl1.cancers.filt.red ,
nrow=1, ncol=4, labels = c("SKCM", "KIRC", "BLCA", "STAD"),font.label = list(size = 14, color = "black", face = "bold", family = "Palatino Linotype"), common.legend = T, legend = "bottom")
pBub_pdl1.filt.red
wb <- loadWorkbook(paste0(projectOutDataPath,"/","Supplemental File 3.xlsx"))
condition="PDL1-"
for(i in seq(1:4)){
# i=1
# Create sheet
addWorksheet(wb, paste0(condition,var[i]))
# Start writing title
# First
writeData(wb, sheet=paste0(condition,var[i]), "Supplementary table 3. Gene ontology biological processes associated with the differentially upregulated genes in PD-L1 expression high samples compared to PD-L1 expression low samples", startCol = 1, startRow = 1)
addStyle(wb = wb, sheet=paste0(condition,var[i]), rows = 1, cols = 1, style = TitleStyle1)
# Second
writeData(wb, sheet=paste0(condition,var[i]), paste0(var[i]," - GO BIOLOGICAL PROCESSES WITH A SIGNIFICANT ASSOCIATION TO THE UPREGULATED GENES (FDR<0.05) "), startCol = 1, startRow = 3)
addStyle(wb = wb, sheet=paste0(condition,var[i]), rows = 3, cols = 1, style = TitleStyle2)
# Third
writeData(wb, sheet=paste0(condition,var[i]), "Adjusted p-value cutoff of enrichment test set to 0.05, method of adjustment used Benjamini-Hochberg", startCol = 1, startRow = 4)
addStyle(wb = wb, sheet=paste0(condition,var[i]), rows = 4, cols = 1, style = TitleStyle3)
# Fourth
writeData(wb, sheet=paste0(condition,var[i]), "Q-value cutoff of enrichment tests to report as significant set to 0.05. ", startCol = 1, startRow = 5)
addStyle(wb = wb, sheet=paste0(condition,var[i]), rows = 5, cols = 1, style = TitleStyle3)
# Write table woth data
writeData(wb, paste0(condition,var[i]), bp_pdl1[[i]] %>% as.data.frame(), startCol = 1, startRow = 7, rowNames = FALSE,headerStyle = headerStyle1)
}
saveWorkbook(wb,paste0(projectOutDataPath,"/","Supplemental File 3.xlsx"), overwrite = TRUE)
columnsDEGs <- c("Symbol","Ensembl_ID","Entrez_ID", "baseMean", "FC","lfcSE", "pvalue" , "padj")
selectCols <- c("gene","ensembl","entrez","baseMean", "FC","lfcSE", "pvalue","padj")
wb <- loadWorkbook(paste0(projectOutDataPath,"/","Supplemental File 2.xlsx"))
degs <- list(skcm.res, kirc.res, blca.res, stad.res)
condition="PDL1-"
for(i in seq(1:4)){
# i=1
# Create sheet
addWorksheet(wb, paste0(condition,var[i]))
# Start writing title
# First
writeData(wb, sheet=paste0(condition,var[i]), "Supplementary table 2. Differentially expressed genes in PD-L1 expression high samples compared to PD-L1 expression low samples", startCol = 1, startRow = 1)
addStyle(wb = wb, sheet=paste0(condition,var[i]), rows = 1, cols = 1, style = TitleStyle1)
# Second
writeData(wb, sheet=paste0(condition,var[i]), paste0(var[i]," - DIFFERENTIALLY EXPRESSED GENES (FDR<0.05) "), startCol = 1, startRow = 3)
addStyle(wb = wb, sheet=paste0(condition,var[i]), rows = 3, cols = 1, style = TitleStyle2)
# Third
writeData(wb, sheet=paste0(condition,var[i]), "Adjusted p-value cutoff (FDR) set to 0.05, method of adjustment used Benjamini-Hochberg", startCol = 1, startRow = 4)
addStyle(wb = wb, sheet=paste0(condition,var[i]), rows = 4, cols = 1, style = TitleStyle3)
# Fourth
writeData(wb, sheet=paste0(condition,var[i]), "apeglm estimator used for the log2 fold change (LFC) shrinkage and DESeq2 R package used for the differential expression analysis", startCol = 1, startRow = 5)
addStyle(wb = wb, sheet=paste0(condition,var[i]), rows = 5, cols = 1, style = TitleStyle3)
# Write table woth data
writeData(wb, paste0(condition,var[i]), degs[[i]]$gL.df %>%
dplyr::select(all_of(selectCols)) %>%
setNames(columnsDEGs), startCol = 1, startRow = 7, rowNames = FALSE,headerStyle = headerStyle1)
}
saveWorkbook(wb,paste0(projectOutDataPath,"/","Supplemental File 2.xlsx"), overwrite = TRUE)
Load GO ORA results
newCol <- "CD8"
suffix <- paste0("_",newCol)
Only upregulated
# Loading enrichment results
bp_cd8<- loadRData(paste0(DEA_GOoutData,"bpGO_allTCGA_res",suffix,Rdata.suffix))
## Drop GO terms not included in our focused GO terms set,
bp_cd8.filt <- sapply(bp_cd8, function(x) dropGO(x, level = NULL, term = setdiff(x %>% as.data.frame() %>% pull(ID),bp_ImmuneRelated.focused.DF$ID)), simplify = FALSE)
if(isTRUE(runFiltSim)){
# Then Similarity analysis on reduced terms
bp_cd8.filt.red <- sapply(bp_cd8.filt, function(x) simplify(x, cutoff=0.45, by="p.adjust", select_fun=min, measure="Wang"), simplify = FALSE)
## REMEMBER: The higher the threshold, the more the redundant terms are removed
save(bp_cd8.filt.red,file=paste0(DEA_GOoutData,"bpGO_allTCGA_res_filt_reduced",suffix,Rdata.suffix))
}else{
bp_cd8.filt.red <- loadRData(paste0(DEA_GOoutData,"bpGO_allTCGA_res_filt_reduced",suffix,Rdata.suffix))
}
bp_cd8.filt.red.DFs <- sapply(bp_cd8.filt.red, function(x) x %>% as.data.frame() %>%
dplyr::select(-geneID) %>%
dplyr::mutate(category=bp_ImmuneRelated.focused.DF$`Parent Description`[match(ID,bp_ImmuneRelated.focused.DF$ID)]) %>%
dplyr::mutate("GeneRatio"=parse_ratio(GeneRatio)), simplify=FALSE)
bp_cd8.filt.red.merged <- ldply(bp_cd8.filt.red.DFs)
bubble_cd8.cancers.filt.red <- sapply(bp_cd8.filt.red.DFs , function(x) GO_bubblePlot(DF=x,
sortVar="GeneRatio",xVar="GeneRatio",yVar="Description",sizeVar="Count",fillVar="p.adjust", aggreg=FALSE,choosePlot="norm",sizeMin=2,sizeMax=182, xMax=ceiling_dec(max(bp_cd8.filt.red.merged$GeneRatio),2), seqN=10, fillMax=max(bp_cd8.filt.red.merged$p.adjust)), simplify = FALSE)
pBub_cd8.filt.red <-ggarrange(plotlist = bubble_cd8.cancers.filt.red ,
nrow=1, ncol=4, labels = c("SKCM", "KIRC", "BLCA", "STAD"),font.label = list(size = 14, color = "black", face = "bold", family = "Palatino Linotype"), common.legend = T, legend = "bottom")
pBub_cd8.filt.red
wb <- loadWorkbook(paste0(projectOutDataPath,"/","Supplemental File 3.xlsx"))
condition="CD8-"
for(i in seq(1:4)){
# i=1
# Create sheet
addWorksheet(wb, paste0(condition,var[i]))
# Start writing title
# First
writeData(wb, sheet=paste0(condition,var[i]), "Supplementary table 3. Gene ontology biological processes associated with the differentially upregulated genes in CD8+ T-cells infiltration high samples compared to CD8+ T-cells infiltration low samples", startCol = 1, startRow = 1)
addStyle(wb = wb, sheet=paste0(condition,var[i]), rows = 1, cols = 1, style = TitleStyle1)
# Second
writeData(wb, sheet=paste0(condition,var[i]), paste0(var[i]," - GO BIOLOGICAL PROCESSES WITH A SIGNIFICANT ASSOCIATION TO THE UPREGULATED GENES (FDR<0.05) "), startCol = 1, startRow = 3)
addStyle(wb = wb, sheet=paste0(condition,var[i]), rows = 3, cols = 1, style = TitleStyle2)
# Third
writeData(wb, sheet=paste0(condition,var[i]), "Adjusted p-value cutoff of enrichment test set to 0.05, method of adjustment used Benjamini-Hochberg", startCol = 1, startRow = 4)
addStyle(wb = wb, sheet=paste0(condition,var[i]), rows = 4, cols = 1, style = TitleStyle3)
# Fourth
writeData(wb, sheet=paste0(condition,var[i]), "Q-value cutoff of enrichment tests to report as significant set to 0.05. ", startCol = 1, startRow = 5)
addStyle(wb = wb, sheet=paste0(condition,var[i]), rows = 5, cols = 1, style = TitleStyle3)
# Write table woth data
writeData(wb, paste0(condition,var[i]), bp_cd8[[i]] %>% as.data.frame(), startCol = 1, startRow = 7, rowNames = FALSE,headerStyle = headerStyle1)
}
saveWorkbook(wb,paste0(projectOutDataPath,"/","Supplemental File 3.xlsx"), overwrite = TRUE)
columnsDEGs <- c("Symbol","Ensembl_ID","Entrez_ID", "baseMean", "FC","lfcSE", "pvalue" , "padj")
selectCols <- c("gene","ensembl","entrez","baseMean", "FC","lfcSE", "pvalue","padj")
wb <- loadWorkbook(paste0(projectOutDataPath,"/","Supplemental File 2.xlsx"))
degs <- list(skcm.res, kirc.res, blca.res, stad.res)
condition="CD8-"
for(i in seq(1:4)){
# i=1
# Create sheet
addWorksheet(wb, paste0(condition,var[i]))
# Start writing title
# First
writeData(wb, sheet=paste0(condition,var[i]), "Supplementary table 2. Differentially expressed genes in CD8+ T-cells infiltration high samples compared to CD8+ T-cells infiltration samples", startCol = 1, startRow = 1)
addStyle(wb = wb, sheet=paste0(condition,var[i]), rows = 1, cols = 1, style = TitleStyle1)
# Second
writeData(wb, sheet=paste0(condition,var[i]), paste0(var[i]," - DIFFERENTIALLY EXPRESSED GENES (FDR<0.05) "), startCol = 1, startRow = 3)
addStyle(wb = wb, sheet=paste0(condition,var[i]), rows = 3, cols = 1, style = TitleStyle2)
# Third
writeData(wb, sheet=paste0(condition,var[i]), "Adjusted p-value cutoff (FDR) set to 0.05, method of adjustment used Benjamini-Hochberg", startCol = 1, startRow = 4)
addStyle(wb = wb, sheet=paste0(condition,var[i]), rows = 4, cols = 1, style = TitleStyle3)
# Fourth
writeData(wb, sheet=paste0(condition,var[i]), "apeglm estimator used for the log2 fold change (LFC) shrinkage and DESeq2 R package used for the differential expression analysis", startCol = 1, startRow = 5)
addStyle(wb = wb, sheet=paste0(condition,var[i]), rows = 5, cols = 1, style = TitleStyle3)
# Write table woth data
writeData(wb, paste0(condition,var[i]), degs[[i]]$gL.df %>%
dplyr::select(all_of(selectCols)) %>%
setNames(columnsDEGs), startCol = 1, startRow = 7, rowNames = FALSE,headerStyle = headerStyle1)
}
saveWorkbook(wb,paste0(projectOutDataPath,"/","Supplemental File 2.xlsx"), overwrite = TRUE)
Figure 1 was produced using Visio.
global.net$layers[[3]]$aes_params$family <- "Palatino Linotype"
global.net$layers[[3]]$aes_params$size <- 5
multi.net$layers[[3]]$aes_params$family <- "Palatino Linotype"
multi.net$layers[[3]]$aes_params$size <- 5
full_netClusters_v <- ggarrange(plotlist = list(global.net, multi.net), nrow=2,common.legend = TRUE, legend="bottom",labels = c("A. Pancancer", "B. Multicancer"),font.label = list(size = 15, color = "black", face = "bold", family = "Palatino Linotype"))
box.tcr.bcr <- ggdraw() +
draw_plot(TwosigScores_boxplot_tcga(tcgaData.tp.sub.filt,mycancertypes.reduced,"project","CD8cells","TCR_Richness",median,"median","CD8+ T cells infiltration","TCR Richness",-.04,-.00001,logY=FALSE, segm = c(250,500), ylims=c(0,1800), ticks = c(50,300),relHeights = c(1,0,.4),rollSum=TRUE),x = 0, y = 0.66, width = 1, height = 0.34) +
draw_plot(TwosigScores_boxplot_tcga(tcgaData.tp.sub.filt,mycancertypes.reduced,"project","TIS_sigscore","TCR_Richness",median,"median","T cell inflamed GEP","TCR Richness",-.04,-.00001,logY=FALSE, segm = c(250,500), ylims=c(0,1800), ticks = c(40,300),relHeights = c(1,0,.4),rollSum=TRUE),x = 0, y = 0.33, width = 1, height = 0.34) +
draw_plot(TwosigScores_boxplot_tcga(tcgaData.tp.sub.filt,mycancertypes.reduced,"project","B_cells","BCR_Richness",median,"median","B cells infiltration","BCR Richness",-.04,-.00001,logY=FALSE, segm = c(320,500), ylims=c(0,1800), ticks = c(50,300),relHeights = c(1,0,.4),rollSum=TRUE),x = 0, y = 0, width = 1, height = 0.34) +
draw_plot_label(label = c("C.", "D.","E."), size = 15,
x = c(0, 0, 0), y = c(1, 0.66,0.33),family="Palatino Linotype")
fig2.1 <-ggarrange(plotlist = list(full_netClusters_v, box.tcr.bcr), ncol=2,common.legend = FALSE,
widths = c(1, 1.5), heights = c(1,2), align="h")#labels = c("", ""),font.label = list(size = 12, color =
fig2.1
cairo_pdf(filename = paste0(figuresPath,"Figure 2",".pdf"),width = 18, height=20)
print(fig2.1)
dev.off()
## png
## 2
#
# ggsave(fig2.1, filename = paste0(figuresPath,"Fig2.v1",".tiff"), dpi = 600,width = 18, height=19.5,
# units = "in")
Produced using excel.
Produced in the missingness analysis script.
multi.net.sep$SKCM$layers[[3]]$aes_params$family <- "Palatino Linotype"
multi.net.sep$SKCM$layers[[3]]$aes_params$size <- 5
multi.net.sep$KIRC$layers[[3]]$aes_params$family <- "Palatino Linotype"
multi.net.sep$KIRC$layers[[3]]$aes_params$size <- 5
multi.net.sep$STAD$layers[[3]]$aes_params$family <- "Palatino Linotype"
multi.net.sep$STAD$layers[[3]]$aes_params$size <- 5
multi.net.sep$BLCA$layers[[3]]$aes_params$family <- "Palatino Linotype"
multi.net.sep$BLCA$layers[[3]]$aes_params$size <- 5
sep_netClusters <- ggarrange(plotlist = multi.net.sep, common.legend = TRUE, legend="bottom",labels = c("A. SKCM", "B. KIRC", "C. BLCA", "D. STAD"),font.label = list(size = 15, color = "black", face = "bold", family = "Palatino Linotype"))
supp3 <-sep_netClusters
supp3
cairo_pdf(filename = paste0(figuresPath, "Supplemental Figure S3",".pdf"),width = 20, height=14)
print(supp3)
dev.off()
## png
## 2
#
# ggsave(supp3, filename = paste0(figuresPath,"Supplemental Figure S3",".tiff"), dpi = 600,width = 20, height=14,
# units = "in")
GO terms filtered and reduced based on similarity analysis
fig3 <-ggdraw() +
draw_plot(p.bcr_h,x = 0, y = 0.6, width = 1, height = .4) +
draw_plot(pBub_bcr_0.filt.red,x = 0.01, y = 0, width = .97, height = .57) +
draw_plot_label(label = c("C"), size = 16,
x = c(0), y = c(.58),family = "Palatino Linotype")
fig3
cairo_pdf(filename = paste0(figuresPath,"Figure 3",".pdf"),width = 25, height=17)
print(fig3)
dev.off()
## png
## 2
#
# ggsave(fig3, filename = paste0(figuresPath,"Figure 3",".tiff"), dpi = 600,width = 25, height=17,
# units = "in")
Minimum number of events per predictor parameter, EPP: 7
## HERE we define for which parameters we want to do Univariable analysis, and which to include in the multivariate survival analysis.
features <- featuresIn
## Define which columns include survival data
OS_time <- c("OS.time")
OS_status <- c("OS")
# PFS_time <- c("PFS.time")
# PFS_status <- c("PFS")
## DEFINE WHICH COLUMNS ARE CATEGORICAL
categ_feats <-featuresIn
## DEFINE WHICH COLUMNS ARE NUMERICAL
# we also add the status and time columns
numeric_feats <-featuresIn
finallabels <- featuresInLabels
names(finallabels) <- featuresIn
## SAVE DATA
### Univariable
## CAT
# uni.cat.meta.tcga<-loadRData(paste0(projectRDataPath,"uni.cat.meta.TCGA",Rdata.suffix))
uni.cat.meta4.tcga<-loadRData(paste0(survivalOutData,"uni.cat.meta4.TCGA",Rdata.suffix))
uni.cat.4.tcga <- loadRData(paste0(survivalOutData,"uni.cat.4.TCGA",Rdata.suffix))
### MULTIVARIATE
## CAT
# multi.cat.meta.tcga <- loadRData(paste0(projectRDataPath,"multi.cat.meta.TCGA",Rdata.suffix))
multi.cat.meta4.tcga <- loadRData(paste0(survivalOutData,"multi.cat.meta4.TCGA",Rdata.suffix))
multi.cat.4.tcga<- loadRData(paste0(survivalOutData,"multi.cat.4.TCGA",Rdata.suffix))
# Get lists with HRs merge them
uni.cat.4.tcga.hrlist <- sapply(uni.cat.4.tcga, function(x) x$res.df, simplify = FALSE)
uni.cat.4.tcga.hrDF <- ldply(uni.cat.4.tcga.hrlist,id="project")
colnames(uni.cat.4.tcga.hrDF)[1] <- "project"
multi.cat.4.tcga.hrlist <- sapply(multi.cat.4.tcga, function(x) x$res.df, simplify = FALSE)
multi.cat.4.tcga.hrDF <- ldply(multi.cat.4.tcga.hrlist,id="project")
colnames(multi.cat.4.tcga.hrDF)[1] <- "project"
uni.cat.4.tcga.hrDF$analysis <- rep("uni",nrow(uni.cat.4.tcga.hrDF))
multi.cat.4.tcga.hrDF$analysis <- rep("multi",nrow(multi.cat.4.tcga.hrDF))
# Merge
categorical.4.tcga <- rbind(uni.cat.4.tcga.hrDF,multi.cat.4.tcga.hrDF)
categorical.4.tcga$analysis <- factor(categorical.4.tcga$analysis,levels=c("uni","multi"))
analysisLabels <- c("Univariable","multivariate")
# significant
categorical.4.tcga$sign <- ifelse(categorical.4.tcga$p <= 0.05,"sign","notsign")
categorical.4.tcga$sign <- factor(categorical.4.tcga$sign, levels = c("sign","notsign"))
fillLabels <- c("significant","non-significant","NA value")
categorical.4.tcga$factor.name <- ifelse(categorical.4.tcga$factor.name=="CD8+ T cells infiltration",
"CD8+ T-cells infiltration",
ifelse(categorical.4.tcga$factor.name=="CD4+ T cells infiltration","CD4+ T-cells infiltration",categorical.4.tcga$factor.name))
categorical.4.tcga$factor.name <- factor(categorical.4.tcga$factor.name, levels = featuresInLabels)
categorical.4.tcga$project <- factor(categorical.4.tcga$project, levels = mycancertypes.final)
## SAVE DATA
### Univariable
## CAT
uni.cat.meta.aPD1 <-loadRData(paste0(survivalOutData,"uni.cat.meta.aPD1",Rdata.suffix))
uni.cat.4.aPD1 <- loadRData(paste0(survivalOutData,"uni.cat.4.aPD1",Rdata.suffix))
### MULTIVARIATE
## CAT
multi.cat.meta.aPD1<- loadRData(paste0(survivalOutData,"multi.cat.meta.aPD1",Rdata.suffix))
multi.cat.4.aPD1 <-loadRData(paste0(survivalOutData,"multi.cat.4.aPD1",Rdata.suffix))
# Get lists with HRs merge them
uni.cat.4.aPD1.hrlist <- sapply(uni.cat.4.aPD1, function(x) x$res.df, simplify = FALSE)
uni.cat.4.aPD1.hrDF <- ldply(uni.cat.4.aPD1.hrlist,id="tissue")
colnames(uni.cat.4.aPD1.hrDF)[1] <- "project"
multi.cat.4.aPD1.hrlist <- sapply(multi.cat.4.aPD1, function(x) x$res.df, simplify = FALSE)
multi.cat.4.aPD1.hrDF <- ldply(multi.cat.4.aPD1.hrlist,id="tissue")
colnames(multi.cat.4.aPD1.hrDF)[1] <- "project"
uni.cat.4.aPD1.hrDF$project <- ifelse(uni.cat.4.aPD1.hrDF$project=="melanoma", "SKCM anti-PD-1/L1",
ifelse(uni.cat.4.aPD1.hrDF$project=="RCC","KIRC anti-PD-1/L1",
ifelse(uni.cat.4.aPD1.hrDF$project=="bladder","BLCA anti-PD-1/L1", ifelse(uni.cat.4.aPD1.hrDF$project=="gastric","STAD anti-PD-1/L1",uni.cat.4.aPD1.hrDF$project))))
uni.cat.4.aPD1.hrDF$analysis <- rep("uni",nrow(uni.cat.4.aPD1.hrDF))
multi.cat.4.aPD1.hrDF$project <- ifelse(multi.cat.4.aPD1.hrDF$project=="melanoma", "SKCM anti-PD-1/L1",
ifelse(multi.cat.4.aPD1.hrDF$project=="RCC","KIRC anti-PD-1/L1",
ifelse(multi.cat.4.aPD1.hrDF$project=="bladder","BLCA anti-PD-1/L1", ifelse(multi.cat.4.aPD1.hrDF$project=="gastric","STAD anti-PD-1/L1",multi.cat.4.aPD1.hrDF$project))))
multi.cat.4.aPD1.hrDF$analysis <- rep("multi",nrow(multi.cat.4.aPD1.hrDF))
# Merge
categorical.4.aPD1 <- rbind(uni.cat.4.aPD1.hrDF,multi.cat.4.aPD1.hrDF)
categorical.4.aPD1$analysis <- factor(categorical.4.aPD1$analysis,levels=c("uni","multi"))
analysisLabels <- c("Univariable","multivariate")
# significant
categorical.4.aPD1$sign <- ifelse(categorical.4.aPD1$p <= 0.05,"sign","notsign")
categorical.4.aPD1$sign <- factor(categorical.4.aPD1$sign, levels = c("sign","notsign"))
fillLabels <- c("significant","non-significant")
categorical.4.aPD1$factor.name <- ifelse(categorical.4.aPD1$factor.name=="CD8+ T cells infiltration",
"CD8+ T-cells infiltration",
ifelse(categorical.4.aPD1$factor.name=="CD4+ T cells infiltration","CD4+ T-cells infiltration",categorical.4.aPD1$factor.name))
categorical.4.aPD1$factor.name <- factor(categorical.4.aPD1$factor.name, levels = featuresInLabels)
## Look at max value, WE HAVE ISSUE
which(categorical.4.aPD1$HR==max(categorical.4.aPD1$HR, na.rm = T))
## [1] 33
categorical.4.aPD1[which(categorical.4.aPD1$HR==max(categorical.4.aPD1$HR, na.rm = T)),]
## project factor.id factor.name factor.value
## 33 STAD anti-PD-1/L1 CD4cells:high CD4+ T-cells infiltration high
## HR Lower_CI Upper_CI Inv_HR Inv_Lower_CI Inv_Upper_CI p
## 33 2.139152 0.904116 5.061267 0.4674749 0.197579 1.106053 0.08352928
## analysis sign
## 33 uni notsign
categorical.4.aPD1$HR[which(categorical.4.aPD1$HR==max(categorical.4.aPD1$HR, na.rm = T))] <-Inf
# Fix
categorical.4.aPD1$project <- factor(categorical.4.aPD1$project, levels = paste0(mycancertypes.final," anti-PD-1/L1"))
# Load aPD1 bootstrapped
multi.boot.aPD1 <- loadRData(paste0(survivalOutData,"multi.cat.BOOT.aPD1",Rdata.suffix))
# Load TCGA bootstrapped
multi.boot.TCGA <- loadRData(paste0(survivalOutData,"multi.cat.BOOT.TCGA",Rdata.suffix))
# Select objects with bootstrapped CIs and pvalues
multi.cat.4.aPD1.hrlist <- sapply(multi.boot.aPD1, function(x) x$bootCIpval, simplify = FALSE)
multi.cat.4.aPD1.hrDF <- ldply(multi.cat.4.aPD1.hrlist,id="project")
colnames(multi.cat.4.aPD1.hrDF)[1] <- "project"
multi.cat.4.aPD1.hrDF$analysis <- rep("Multivariable",nrow(multi.cat.4.aPD1.hrDF))
multi.cat.4.aPD1.hrDF$project <- ifelse(multi.cat.4.aPD1.hrDF$project=="melanoma", "SKCM",
ifelse(multi.cat.4.aPD1.hrDF$project=="RCC","KIRC",
ifelse(multi.cat.4.aPD1.hrDF$project=="bladder","BLCA", ifelse(multi.cat.4.aPD1.hrDF$project=="gastric","STAD",multi.cat.4.aPD1.hrDF$project))))
################################
# Merge: Uni TCGA, Uni antiPD1, multi ALL
uni.cat.4.tcga.hrDF$cohort <- rep("TCGA", nrow(uni.cat.4.tcga.hrDF))
uni.cat.4.tcga.hrDF$analysis <- "Univariable"
uni.cat.4.tcga.hrDF$analysis <- paste0(uni.cat.4.tcga.hrDF$analysis, " - ", uni.cat.4.tcga.hrDF$cohort)
uni.cat.4.aPD1.hrDF$cohort <- rep("anti-PD-1/L1", nrow(uni.cat.4.aPD1.hrDF))
uni.cat.4.aPD1.hrDF$analysis <- "Univariable"
uni.cat.4.aPD1.hrDF$project <-str_replace(uni.cat.4.aPD1.hrDF$project," anti-PD-1/L1","")
uni.cat.4.aPD1.hrDF$analysis <- paste0(uni.cat.4.aPD1.hrDF$analysis, " - ", uni.cat.4.aPD1.hrDF$cohort)
multi.cat.4.aPD1.hrDF$cohort <- "anti-PD-1/L1"
multi.cat.4.aPD1.hrDF$analysis <- paste0(multi.cat.4.aPD1.hrDF$analysis, " - ", multi.cat.4.aPD1.hrDF$cohort)
####
categorical.4.tcga.aPD1 <- rbind(uni.cat.4.tcga.hrDF[,-c(4,8,9,10,13)],uni.cat.4.aPD1.hrDF[,-c(4,8,9,10,13)],multi.cat.4.aPD1.hrDF[,-c(8,10)])
categorical.4.tcga.aPD1$analysis <- factor(categorical.4.tcga.aPD1$analysis,levels=c("Univariable - TCGA","Univariable - anti-PD-1/L1", "Multivariable - anti-PD-1/L1"))
analysisLabels <- c("Univariable - TCGA","Univariable - anti-PD-1/L1", "Multivariable - anti-PD-1/L1")
##
categorical.4.tcga.aPD1$signif. <- ifelse(categorical.4.tcga.aPD1$p <= 0.05,"p <= 0.05",
ifelse(categorical.4.tcga.aPD1$p <= 0.1,"p <= 0.1",
"ns"))
categorical.4.tcga.aPD1$signif. <- factor(categorical.4.tcga.aPD1$signif., levels = c("p <= 0.1","p <= 0.05","ns"))
fillLabels <- c("p <= 0.1","p <= 0.05","ns")
##
categorical.4.tcga.aPD1$factor.name <- ifelse(categorical.4.tcga.aPD1$factor.name=="CD8+ T cells infiltration",
"CD8+ T-cells infiltration",
ifelse(categorical.4.tcga.aPD1$factor.name=="CD4+ T cells infiltration","CD4+ T-cells infiltration",categorical.4.tcga.aPD1$factor.name))
categorical.4.tcga.aPD1$factor.name <- factor(categorical.4.tcga.aPD1$factor.name, levels = featuresInLabels)
categorical.4.tcga.aPD1$project <- factor(categorical.4.tcga.aPD1$project, levels = mycancertypes.final)
categorical.4.tcga.aPD1$signif.<-factor(categorical.4.tcga.aPD1$signif., levels = c("ns","p <= 0.1","p <= 0.05"))
fig4 <-ggdraw() +
draw_plot(pOrr,x = 0, y = 0.42, width = .98, height = .58) +
draw_plot(p.m,x = 0.0, y = 0, width = .97, height = .39) +
draw_plot_label(label = c("C"), size = 16,
x = c(0), y = c(.4),family = "Palatino Linotype")
fig4
cairo_pdf(filename = paste0(figuresPath,"Figure 4",".pdf"),width = 14, height=13)
print(fig4)
dev.off()
## png
## 2
# ggsave(fig4, filename = paste0(figuresPath,"Figure 4", ".tiff"), dpi = 600,width = 14, height=13,
# units = "in")
supp5 <-ggdraw() +
draw_plot(pBub_pdl1.filt.red,x = 0, y = .5, width = .99, height = .45) +
draw_plot(pBub_cd8.filt.red,x = 0, y = 0, width = .99, height = .45) +
draw_plot_label(label = c("A: PD-L1 high vs low", "B: CD8+ T cells infiltration high vs low"), size = 16,
x = c(-.035,-.07), y = c(.98,.48),family = "Palatino Linotype")
supp5
cairo_pdf(filename = paste0(figuresPath,"Supplemental Figure S4",".pdf"),width = 24, height=21)
print(supp5)
dev.off()
## png
## 2
#
#
# ggsave(supp5, filename = paste0(figuresPath,"Supplemental Figure S4",".tiff"), dpi = 600,width = 24, height=21,
# units = "in")
png
2
[1] “>>Final total 449 patients included<<” [1] “LOCAL MEDIAN - SKCM - TCR_Richness: 13.5” [1] “>>Final total 535 patients included<<” [1] “LOCAL MEDIAN - KIRC - TCR_Richness: 33” [1] “>>Final total 411 patients included<<” [1] “LOCAL MEDIAN - BLCA - TCR_Richness: 5” [1] “>>Final total 443 patients included<<” [1] “LOCAL MEDIAN - STAD - TCR_Richness: 95” [1] “>>Final total 449 patients included<<” [1] “LOCAL MEDIAN - SKCM - TCR_Richness: 13.5” [1] “>>Final total 535 patients included<<” [1] “LOCAL MEDIAN - KIRC - TCR_Richness: 33” [1] “>>Final total 411 patients included<<” [1] “LOCAL MEDIAN - BLCA - TCR_Richness: 5” [1] “>>Final total 443 patients included<<” [1] “LOCAL MEDIAN - STAD - TCR_Richness: 95” [1] “>>Final total 449 patients included<<” [1] “LOCAL MEDIAN - SKCM - BCR_Richness: 24.5” [1] “>>Final total 535 patients included<<” [1] “LOCAL MEDIAN - KIRC - BCR_Richness: 6” [1] “>>Final total 411 patients included<<” [1] “LOCAL MEDIAN - BLCA - BCR_Richness: 9” [1] “>>Final total 443 patients included<<” [1] “LOCAL MEDIAN - STAD - BCR_Richness: 121” [1] “>>Final total 449 patients included<<” [1] “LOCAL MEDIAN - SKCM - BCR_Richness: 24.5” [1] “>>Final total 535 patients included<<” [1] “LOCAL MEDIAN - KIRC - BCR_Richness: 6” [1] “>>Final total 411 patients included<<” [1] “LOCAL MEDIAN - BLCA - BCR_Richness: 9” [1] “>>Final total 443 patients included<<” [1] “LOCAL MEDIAN - STAD - BCR_Richness: 121” [1] “>>Final total 220 patients included<<” [1] “LOCAL MEDIAN - SKCM anti-PD-1/L1 - TCR_Richness: 20” [1] “>>Final total 88 patients included<<” [1] “LOCAL MEDIAN - KIRC anti-PD-1/L1 - TCR_Richness: 21.5” [1] “>>Final total 344 patients included<<” [1] “LOCAL MEDIAN - BLCA anti-PD-1/L1 - TCR_Richness: 10” [1] “>>Final total 43 patients included<<” [1] “LOCAL MEDIAN - STAD anti-PD-1/L1 - TCR_Richness: 42” [1] “>>Final total 220 patients included<<” [1] “LOCAL MEDIAN - SKCM anti-PD-1/L1 - TCR_Richness: 20” [1] “>>Final total 88 patients included<<” [1] “LOCAL MEDIAN - KIRC anti-PD-1/L1 - TCR_Richness: 21.5” [1] “>>Final total 344 patients included<<” [1] “LOCAL MEDIAN - BLCA anti-PD-1/L1 - TCR_Richness: 10” [1] “>>Final total 43 patients included<<” [1] “LOCAL MEDIAN - STAD anti-PD-1/L1 - TCR_Richness: 42” [1] “>>Final total 220 patients included<<” [1] “LOCAL MEDIAN - SKCM anti-PD-1/L1 - BCR_Richness: 86.5” [1] “>>Final total 88 patients included<<” [1] “LOCAL MEDIAN - KIRC anti-PD-1/L1 - BCR_Richness: 89” [1] “>>Final total 344 patients included<<” [1] “LOCAL MEDIAN - BLCA anti-PD-1/L1 - BCR_Richness: 200” [1] “>>Final total 43 patients included<<” [1] “LOCAL MEDIAN - STAD anti-PD-1/L1 - BCR_Richness: 1980” [1] “>>Final total 220 patients included<<” [1] “LOCAL MEDIAN - SKCM anti-PD-1/L1 - BCR_Richness: 86.5” [1] “>>Final total 88 patients included<<” [1] “LOCAL MEDIAN - KIRC anti-PD-1/L1 - BCR_Richness: 89” [1] “>>Final total 344 patients included<<” [1] “LOCAL MEDIAN - BLCA anti-PD-1/L1 - BCR_Richness: 200” [1] “>>Final total 43 patients included<<” [1] “LOCAL MEDIAN - STAD anti-PD-1/L1 - BCR_Richness: 1980”
png
2
## SURVIVAL DATA
tcga.SURV <- sapply(featuresIn[3:9], function(x) loadRData(paste0(survivalOutData, paste0("tcga.SURV","_","OS","_",x),Rdata.suffix))$km,simplify = FALSE)
aPD1.SURV <- sapply(featuresIn[3:9], function(x) loadRData(paste0(survivalOutData, paste0("aPD1.SURV","_","OS","_",x),Rdata.suffix))$km,simplify = FALSE)
res.km <-sapply(featuresIn[3:9], function(x) survComboKM(tcga.SURV,aPD1.SURV,x), simplify = FALSE)
supp7 <-ggarrange(plotlist=res.km[1:3],
labels = paste0(c("A: ", "B: ", "C: "),featuresInLabels[3:5],c(" TCGA vs anti-PD1/L1", " TCGA vs anti-PD1/L1", " TCGA vs anti-PD1/L1")),#c("A", "B", "C")
ncol = 1, nrow = 3,
label.x=c(-0.105,-0.135,-0.142),
font.label = list(size = 14, color = "black", face = "bold", family = "Palatino Linotype"))#,c(" Univariable", " Univariable", " Univariable")
supp7
cairo_pdf(filename = paste0(figuresPath,"Supplemental Figure S7",".pdf"),family = "Palatino Linotype", width = 14, height=22)
print(supp7)
dev.off()
png 2
# ggsave(supp7, filename = paste0(figuresPath,"Supplemental Figure S7", ".tiff"), dpi = 600,width = 14,
# height = 22,
# units = "in")
supp8 <-ggarrange(plotlist=res.km[4:7],
labels = paste0(c("A: ", "B: ", "C: ","D: "),featuresInLabels[6:9],c(" TCGA vs anti-PD1/L1", " TCGA vs anti-PD1/L1", " TCGA vs anti-PD1/L1")),
ncol = 1, nrow = 4,
label.x=c(-0.155,-0.155,-0.135,-0.123),
font.label = list(size = 14, color = "black", face = "bold", family = "Palatino Linotype"))
supp8
cairo_pdf(filename = paste0(figuresPath,"Supplemental Figure S8",".pdf"),family = "Palatino Linotype", width = 14, height=26)
print(supp8)
dev.off()
png 2
# ggsave(supp8, filename = paste0(figuresPath,"Supplemental Figure S8",".tiff"), dpi = 600,width = 14,
# height = 26,
# units = "in")
modelTypeA <- "_TCRArichnessIGH.ds"
modelTypeB <- "_noLIU"
modelTypeC <- "_mintcr4bcr10"
###
## DATA
###
# Keep samples with TMB (.tmb)
immdata.TcrBcr.full.red.sub.tmb <- immdata.TcrBcr.full.red.sub %>% dplyr::filter(complete.cases(logTMB)) %>% droplevels()#;nrow(immdata.TcrBcr.full.red.sub.tmb)
# ONLY MELANOMA, WITH TMB (.tmb.MEL)
immdata.TcrBcr.full.red.sub.tmb.MEL <- immdata.TcrBcr.full.red.sub.tmb %>% dplyr::filter(tissue=="melanoma") %>% droplevels()#;nrow(immdata.TcrBcr.full.red.sub.tmb.MEL)
## ALL MELANOMA AND BLADDER, WITH TMB (.tmb.ALL)
immdata.TcrBcr.full.red.sub.tmb.ALL <- immdata.TcrBcr.full.red.sub.tmb %>% dplyr::filter(tissue %in% c("melanoma","bladder")) %>% droplevels()#;nrow(immdata.TcrBcr.full.red.sub.tmb.ALL)
# ALL DATA, MELANOMA AND BLADDER, BUT LEAVE MISSING TMB (.ALL)
immdata.TcrBcr.full.red.sub.ALL <- immdata.TcrBcr.full.red.sub %>% dplyr::filter(tissue %in% c("melanoma","bladder")) %>% droplevels()#;nrow(immdata.TcrBcr.full.red.sub.ALL)
## INSTEAD OF FILTERING FOR NO MISSING TMB, I DO THAT FOR ALL COVARIATES OF INTEREST
# Remove rows that have outliers/missing values since I get errors with Knn imputation
immdata.TcrBcr.full.red.sub.ALL.filt <- immdata.TcrBcr.full.red.sub.ALL %>% filter_at(vars(covariatesIn), all_vars(complete.cases(.))) %>% droplevels()
# DOING THE SAME FOR THE ORIGINAL DATA - USE THIS WHEN TESTING
# these data include other tissues apart from melanoma and bladder
immdata.TcrBcr.full.red.sub.filt <- immdata.TcrBcr.full.red.sub %>% filter_at(vars(covariatesIn), all_vars(complete.cases(.))) %>% droplevels()
# FINAL DATA FOR MODELLING
# Remove the Liu dataset for the model testing
immdata.TcrBcr.BLAD.MEL.ready <-immdata.TcrBcr.full.red.sub.ALL.filt %>% dplyr::filter(dataset %notin% c("Liu") ) %>% droplevels() # All melanoma & bladder, PD, CR, PR, complete data
immdata.TcrBcr.ALL.ready <- immdata.TcrBcr.full.red.sub.filt %>% dplyr::filter(dataset %notin% c("Liu") ) %>% droplevels() # All tissues, PD, CR, PR, complete data
## For gastric
immdata.TcrBcr.full.red.sub.filt.gastric <- immdata.TcrBcr.full.red.sub %>% dplyr::filter(tissue %in% c("gastric") ) %>%
filter_at(vars(covariatesIn[-6]), all_vars(complete.cases(.))) %>% droplevels()
Load test data for predictive models
#################
## LOAD TEST DATA FOR MODELS
testDF <- loadRData(paste0(testDataOut,"testDF",modelTypeA,modelTypeB, modelTypeC,".RData"))
data.testX <- as.data.frame(testDF[[1]][,which(colnames(testDF[[1]]) %in% covariatesIn)])
## for gastric
testDF.gastric <-loadRData(paste0(testDataOut,"testDF.gastric",modelTypeA,modelTypeB, modelTypeC,".RData"))
# Addind Liu dataset
test.ALL <- rbind(testDF[[1]],immdata.TcrBcr.full.red.sub.ALL.filt %>% dplyr::filter(dataset %in% c("Liu") ) %>% droplevels())
data.testX.ALL <- as.data.frame(test.ALL[,which(colnames(test.ALL) %in% covariatesIn)])
test.DF.melanoma <- test.ALL %>% dplyr::filter(tissue %in% c("melanoma")) %>% droplevels()
data.testX.melanoma <- as.data.frame(test.DF.melanoma[,which(colnames(test.DF.melanoma) %in% covariatesIn)])
test.DF.bladder <- testDF[[1]] %>% dplyr::filter(tissue %in% c("bladder")) %>% droplevels()
data.testX.bladder <- as.data.frame(test.DF.bladder[,which(colnames(test.DF.bladder) %in% covariatesIn)])
test.DF.rcc <- testDF[[1]] %>% dplyr::filter(tissue %in% c("RCC")) %>% droplevels()
data.testX.rcc <- as.data.frame(test.DF.rcc[,which(colnames(test.DF.rcc) %in% covariatesIn)])
test.DF.gastric <- testDF.gastric
data.testX.gastric <- as.data.frame(test.DF.gastric[,which(colnames(test.DF.gastric) %in% covariatesIn[-6])])
# LOAD FINAL MODELS
models.final <- loadRData(paste0(modelsInterm,"models.final",modelTypeA,modelTypeB, modelTypeC,".RData"))
sigs <- c("TCRArichness.ds", "logTMB","PDL1","BCR.IGHrichness.ds","TIS_sigscore",
paste(c("TCRArichness.ds","BCR.IGHrichness.ds"),collapse = " + "),
paste(c("TCRArichness.ds", "logTMB"),collapse = " + "),
paste(c("TCRArichness.ds", "PDL1"),collapse = " + "),
paste(c("TCRArichness.ds", "TIS_sigscore"),collapse = " + "),
paste(c("BCR.IGHrichness.ds", "logTMB"),collapse = " + "),
paste(c("BCR.IGHrichness.ds", "PDL1"),collapse = " + "),
paste(c("BCR.IGHrichness.ds", "TIS_sigscore"),collapse = " + "),
paste(c("logTMB", "PDL1"),collapse = " + "),
paste(c("logTMB", "TIS_sigscore"),collapse = " + "),
paste(c("PDL1","TIS_sigscore"),collapse = " + "),
# TRI
paste(c("TCRArichness.ds","BCR.IGHrichness.ds","logTMB"),collapse = " + "),
paste(c("TCRArichness.ds","BCR.IGHrichness.ds","PDL1"),collapse = " + "),
paste(c("TCRArichness.ds","BCR.IGHrichness.ds","TIS_sigscore"),collapse = " + "),
paste(c("TCRArichness.ds","logTMB","PDL1"),collapse = " + "),
paste(c("TCRArichness.ds","logTMB","TIS_sigscore"),collapse = " + "),
paste(c("TCRArichness.ds","PDL1","TIS_sigscore"),collapse = " + "),
paste(c("BCR.IGHrichness.ds","logTMB","PDL1"),collapse = " + "),
paste(c("BCR.IGHrichness.ds","logTMB","TIS_sigscore"),collapse = " + "),
paste(c("BCR.IGHrichness.ds","PDL1","TIS_sigscore"),collapse = " + "),
paste(c("logTMB","PDL1","TIS_sigscore"),collapse = " + "),
#QUADRA
paste(c("TCRArichness.ds","BCR.IGHrichness.ds","logTMB","PDL1"),collapse = " + "),
paste(c("TCRArichness.ds","BCR.IGHrichness.ds","logTMB","TIS_sigscore"),collapse = " + "),
paste(c("TCRArichness.ds","BCR.IGHrichness.ds","PDL1","TIS_sigscore"),collapse = " + "),
paste(c("TCRArichness.ds","logTMB","PDL1","TIS_sigscore"),collapse = " + "),
paste(c("BCR.IGHrichness.ds", "logTMB","PDL1","TIS_sigscore"),collapse = " + "),
#ALL
paste(c("TCRArichness.ds","BCR.IGHrichness.ds","logTMB","PDL1","TIS_sigscore"),collapse = " + "),
"all predictors")
sigs_axisNames<- c(rep("Univariate Logistic Regression",5),
rep("Bivariate Logistic Regression",10),
rep("Trivariate Logistic Regression",10),
rep("Quadravariate Logistic Regression",5),
rep("5-variate Logistic Regression",1),
"RFE with Naive Bayes")
sigs_axisNames<-names(models.final)
sigs_titleNames<- names(models.final)
ClassLevels = c("CRPR","PD")
rfeMode <- c(rep(FALSE,length(models.final)-1),
TRUE)
indexUniRFE <-c(1:5,32)
indexUni<- c(1:5)
indexRFE <- 6
indexBiv <-c(6:15)
indexTriv<-c(16:25)
indexQuad5<-c(26:32)
indexGASTRIC <-c(1,2,4,5)
test.DF.melanoma <- test.ALL %>% dplyr::filter(tissue %in% c("melanoma")) %>% droplevels()
data.testX.melanoma <- as.data.frame(test.DF.melanoma[,which(colnames(test.DF.melanoma) %in% covariatesIn)])
test.DF.bladder <- testDF[[1]] %>% dplyr::filter(tissue %in% c("bladder")) %>% droplevels()
data.testX.bladder <- as.data.frame(test.DF.bladder[,which(colnames(test.DF.bladder) %in% covariatesIn)])
test.DF.rcc <- testDF[[1]] %>% dplyr::filter(tissue %in% c("RCC")) %>% droplevels()
data.testX.rcc <- as.data.frame(test.DF.rcc[,which(colnames(test.DF.rcc) %in% covariatesIn)])
test.DF.gastric <- immdata.TcrBcr.full.red.sub %>% dplyr::filter(tissue %in% c("gastric")) %>% distinct() %>% droplevels()
data.testX.gastric <- as.data.frame(test.DF.gastric[,which(colnames(test.DF.gastric) %in% covariatesIn[-6])])
models.gastric <-models.final[c(1:2,4:5,6,8:9,11:12,15,17:18,21,24,28)]
names(models.gastric) <- c('M01: R ~ TCRa richness','M02: R ~ BCRigh richness',
# "M3: R ~ logTMB",
'M04: R ~ PD-L1',
'M05: R ~ TIS GEP',
#BIVAR
"M06 (LR): R ~ TCR richness + BCR richness",
# "M7 (LR): R ~ TCR richness + logTMB",
"M08 (LR): R ~ TCR richness + PD-L1",
"M09 (LR): R ~ TCR richness + TIS GEP",
# "M10 (LR): R ~ BCR richness + logTMB",
"M11 (LR): R ~ BCR richness + PD-L1",
"M12 (LR): R ~ BCR richness + TIS GEP",
# "M13 (LR): R ~ logTMB + PD-L1",
# "M14 (LR): R ~ logTMB + TIS GEP",
"M15 (LR): R ~ PD-L1 + TIS GEP",
#TRIVAR
# "M16 (LR): R ~ TCR richness + BCR richness + logTMB",
"M17 (LR): R ~ TCR richness + BCR richness + PD-L1",
"M18 (LR): R ~ TCR richness + BCR richness + TIS GEP",
# "M19 (LR): R ~ TCR richness + logTMB + PD-L1",
# "M20 (LR): R ~ TCR richness + logTMB + TIS GEP",
"M21 (LR): R ~ TCR richness + PD-L1 + TIS GEP",
# "M22 (LR): R ~ BCR richness + logTMB + PD-L1",
# "M23 (LR): R ~ BCR richness + logTMB + TIS GEP",
"M24 (LR): R ~ BCR richness + PD-L1 + TIS GEP",
# "M25 (LR): R ~ logTMB + PD-L1 + TIS GEP",
#QUADRAVAR
# "M26 (LR): R ~ TCR richness + BCR richness + logTMB + PD-L1",
# "M27 (LR): R ~ TCR richness + BCR richness + logTMB + TIS GEP",
"M28 (LR): R ~ TCR richness + BCR richness + PD-L1 + TIS GEP"
# "M29 (LR): R ~ TCR richness + logTMB + PD-L1 + TIS GEP",
# "M30 (LR): R ~ BCR richness + logTMB + PD-L1 + TIS GEP",
# "M31 (LR): R ~ TCR richness + BCR richness + logTMB + PD-L1 + TIS GEP",
)
sigs <- c("TCRArichness.ds", "BCR.IGHrichness.ds","PDL1","TIS_sigscore",
paste(c("TCRArichness.ds","BCR.IGHrichness.ds"),collapse = " + "),
# paste(c("TCRArichness.ds", "logTMB"),collapse = " + "),
paste(c("TCRArichness.ds", "PDL1"),collapse = " + "),
paste(c("TCRArichness.ds", "TIS_sigscore"),collapse = " + "),
# paste(c("BCR.IGHrichness.ds", "logTMB"),collapse = " + "),
paste(c("BCR.IGHrichness.ds", "PDL1"),collapse = " + "),
paste(c("BCR.IGHrichness.ds", "TIS_sigscore"),collapse = " + "),
# paste(c("logTMB", "PDL1"),collapse = " + "),
# paste(c("logTMB", "TIS_sigscore"),collapse = " + "),
paste(c("PDL1","TIS_sigscore"),collapse = " + "),
# TRI
# paste(c("TCRArichness.ds","BCR.IGHrichness.ds","logTMB"),collapse = " + "),
paste(c("TCRArichness.ds","BCR.IGHrichness.ds","PDL1"),collapse = " + "),
paste(c("TCRArichness.ds","BCR.IGHrichness.ds","TIS_sigscore"),collapse = " + "),
# paste(c("TCRArichness.ds","logTMB","PDL1"),collapse = " + "),
# paste(c("TCRArichness.ds","logTMB","TIS_sigscore"),collapse = " + "),
paste(c("TCRArichness.ds","PDL1","TIS_sigscore"),collapse = " + "),
# paste(c("BCR.IGHrichness.ds","logTMB","PDL1"),collapse = " + "),
# paste(c("BCR.IGHrichness.ds","logTMB","TIS_sigscore"),collapse = " + "),
paste(c("BCR.IGHrichness.ds","PDL1","TIS_sigscore"),collapse = " + "),
# paste(c("logTMB","PDL1","TIS_sigscore"),collapse = " + "),
#QUADRA
# paste(c("TCRArichness.ds","BCR.IGHrichness.ds","logTMB","PDL1"),collapse = " + "),
# paste(c("TCRArichness.ds","BCR.IGHrichness.ds","logTMB","TIS_sigscore"),collapse = " + "),
paste(c("TCRArichness.ds","BCR.IGHrichness.ds","PDL1","TIS_sigscore"),collapse = " + ")
# paste(c("TCRArichness.ds","logTMB","PDL1","TIS_sigscore"),collapse = " + "),
# paste(c("BCR.IGHrichness.ds", "logTMB","PDL1","TIS_sigscore"),collapse = " + "),
#ALL
# paste(c("TCRArichness.ds","BCR.IGHrichness.ds","logTMB","PDL1","TIS_sigscore"),collapse = " + "),
# "all predictors"
)
sigs_axisNames<- c(rep("Univariate Logistic Regression",4),
rep("Bivariate Logistic Regression",6),
rep("Trivariate Logistic Regression",4),
rep("Quadravariate Logistic Regression",1)
)
sigs_axisNames<-names(models.gastric)
sigs_titleNames<- names(models.gastric)
rocs_schemes <- list(`BLCA+\nSKCM+\nKIRC\n(110) `=roc_values,
`BLCA\n(59) ` = roc_values.blad,
`SKCM\n(95) ` = roc_values.mel,
`KIRC\n(31) ` = roc_values.rcc,
`STAD\n(29) ` = roc_values.gas,
`BLCA\n(53) ` = roc_values.blad.spec,
`SKCM\n(31) ` = roc_values.mel.spec)
# Change column names
rocs_schemes <- sapply(rocs_schemes, function(x) x %>%
dplyr::select(-dsids,-curvetypes,-NOfeaturesOut,-optPredictors) %>%
setNames(c("Model", "ROC-AUC", "Trained_on", "Tested_on","Sample_test")), simplify=FALSE)
schemesTitles <- c("Scheme 1 - Histology non-specific models",
"Scheme 1 - Histology non-specific models",
"Scheme 1 - Histology non-specific models",
"Scheme 1 - Histology non-specific models",
"Scheme 1 - Histology non-specific models",
"Scheme 2 - BLCA-specific models",
"Scheme 3 - SKCM-specific models")
trainedOn <- c("Trained on BLCA, SKCM (excl. Liu et al. dataset) anti-PD-1/L1 datasets",
"Trained on BLCA, SKCM (excl. Liu et al. dataset) anti-PD-1/L1 datasets",
"Trained on BLCA, SKCM (excl. Liu et al. dataset) anti-PD-1/L1 datasets",
"Trained on BLCA, SKCM (excl. Liu et al. dataset) anti-PD-1/L1 datasets",
"Trained on BLCA, SKCM (excl. Liu et al. dataset) anti-PD-1/L1 datasets",
"Trained on BLCA anti-PD-1/L1 datasets",
"Trained on SKCM (incl. Liu et al. dataset) anti-PD-1/L1 datasets")
testedOn <- c("Tested on BLCA, SKCM (excl. Liu et al. dataset) and KIRC anti-PD-1/L1 datasets",
"Tested on BLCA anti-PD-1/L1 datasets",
"Tested on SKCM anti-PD-1/L1 datasets",
"Tested on external validation KIRC anti-PD-1/L1 dataset",
"Tested on one external validation STAD anti-PD-1/L1 dataset",
"Tested on BLCA anti-PD-1/L1 dataset",
"Tested on SKCM anti-PD-1/L1 dataset")
schemeSheet <- c("S1-All",
"S1-BLCA",
"S1-SKCM",
"S1-KIRC",
"S1-STAD",
"S2-BLCA specific",
"S3-SKCM specific")
wb <- createWorkbook()
# condition="BCR-"
for(i in seq(1:7)){
# i=1
# Create sheet
addWorksheet(wb, schemeSheet[i])
# Start writing title
# First
writeData(wb, sheet=schemeSheet[i], "Supplementary table 4. ROC-AUC values of univariable models of established biomarkers (logTMB, PD-L1, GEP), TCR richness, BCR richness compared to the bivariable and multivariable models (including the Recursive Feature Elimination model). Multivariable models include additional potential biomarkers CD8+ and CD4+ T-cells infiltration, B-cells infiltration and Tumor Purity. Models were evaluated on separate test data.", startCol = 1, startRow = 1)
addStyle(wb = wb, sheet=schemeSheet[i], rows = 1, cols = 1, style = TitleStyle1)
# Second
writeData(wb, sheet=schemeSheet[i], schemesTitles[i], startCol = 1, startRow = 3)
addStyle(wb = wb, sheet=schemeSheet[i], rows = 3, cols = 1, style = TitleStyle2)
# Third
writeData(wb, sheet=schemeSheet[i], trainedOn[i], startCol = 1, startRow = 4)
addStyle(wb = wb, sheet=schemeSheet[i], rows = 4, cols = 1, style = TitleStyle3)
# Fourth
writeData(wb, sheet=schemeSheet[i], testedOn[i], startCol = 1, startRow = 5)
addStyle(wb = wb, sheet=schemeSheet[i], rows = 5, cols = 1, style = TitleStyle3)
# Write table woth data
writeData(wb,schemeSheet[i], rocs_schemes[[i]] %>% as.data.frame(), startCol = 1, startRow = 7, rowNames = FALSE,headerStyle = headerStyle1)
}
saveWorkbook(wb,paste0(projectOutDataPath,"/","Supplemental File 4.xlsx"), overwrite = TRUE)
rocs_schemes_merged_DF <- ldply(rocs_schemes)
keep_models <- unique(rocs_schemes_merged_DF$Model)[c(1:5,32:34)]
# keep_models
rocs_schemes_merged_DF.sub <- rocs_schemes_merged_DF %>% dplyr::filter(Model %in% keep_models) %>% droplevels()
colnames(rocs_schemes_merged_DF.sub)[1] <- "scheme"
rocs_schemes_merged_DF.sub$scheme <- factor(rocs_schemes_merged_DF.sub$scheme, levels=rev(rocs_schemes_merged_DF.sub$scheme %>% unique()))
rocs_schemes_merged_DF.sub$Model <- ifelse(rocs_schemes_merged_DF.sub$Model=="M01: R ~ TCRa richness", "TCR Richness",
ifelse(rocs_schemes_merged_DF.sub$Model=="M02: R ~ BCRigh richness", "BCR Richness",
ifelse(rocs_schemes_merged_DF.sub$Model=="M03: R ~ logTMB", "logTMB",
ifelse(rocs_schemes_merged_DF.sub$Model=="M04: R ~ PD-L1", "PD-L1",
ifelse(rocs_schemes_merged_DF.sub$Model=="M05: R ~ TIS GEP", "TIS GEP",ifelse(rocs_schemes_merged_DF.sub$Model=="M32 (RFE-NB): R ~ .", "RFE-optimal", ifelse(rocs_schemes_merged_DF.sub$Model=="M32 (RFE-SVM Radial): R ~ .", "RFE-optimal",ifelse(rocs_schemes_merged_DF.sub$Model=="M32 (RFE-SVM Linear)" , "RFE-optimal",rocs_schemes_merged_DF.sub$Model))))))))
rocs_schemes_merged_DF.sub$Model <- factor(rocs_schemes_merged_DF.sub$Model, levels=unique(rocs_schemes_merged_DF.sub$Model))
roc_meansByModel <-aggregate( `ROC-AUC` ~ Model, data = rocs_schemes_merged_DF.sub, FUN = mean) %>% arrange(desc(`ROC-AUC`))
roc_meansByModel
## Model ROC-AUC
## 1 RFE-optimal 0.7426960
## 2 TIS GEP 0.6462104
## 3 logTMB 0.6448493
## 4 TCR Richness 0.6412568
## 5 BCR Richness 0.6221735
## 6 PD-L1 0.5955789
rocs_schemes_merged_DF.sub$Model <- factor(rocs_schemes_merged_DF.sub$Model, levels=roc_meansByModel$Model %>% as.character() %>% rev())
fig5 <-myBubblePlot(rocs_schemes_merged_DF.sub,"Model", "scheme", "Sample_test", "ROC-AUC" )
fig5 <- fig5+theme(plot.margin = unit(c(0, 0, 0, 0), "cm"),axis.title=element_blank(),axis.ticks.length = unit(0, "pt"))
# Scheme names as panels
p2 <- tibble(ymin = c(0, 1,2), ymax = c(1,2, 7), fill = c("SKCM-specific\n(Scheme 3)", "BLCA-specific\n(Scheme 2)", "Histology non-specific\n(Scheme 1)")) %>%
ggplot() +
geom_rect(aes(xmin = 0, xmax = 1, ymin = ymin, ymax = ymax), color="grey50", fill="white") + #, fill = fill
geom_text(aes(x = .5, y = (ymin + ymax) / 2, label = fill, fontface="bold", family="Palatino Linotype"), angle = 90,size=4.5) +
# scale_y_reverse(breaks = seq(1, 10), expand = expansion(mult = c(0, 0))) +
scale_y_continuous(breaks = seq(1, 7), expand = expansion(mult = c(0, 0))) +
scale_x_continuous(breaks = c(0), expand = expansion(mult = c(0, 0))) +
guides(fill = FALSE) + labs(x="", y="") +
theme_bw(base_family = "Palatino Linotype", base_size = 20) +
theme(axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank())
fig5.f <-p2 + fig5 + patchwork::plot_layout(widths = c(0.7, 9.3))
# fig5.f
# Model names as panels
p3 <- tibble(xmin = c(0, 1,2,3,4,5), xmax = c(1,2,3,4,5,6), fill = c("TCR Richness", "BCR Richness", "LogTMB", "PD-L1", "TIS-GEP", "RFE-optimal")) %>%
ggplot() +
geom_rect(aes(ymin = 0, ymax = 1, xmin = xmin, xmax = xmax), color="grey50", fill="white") + #, fill = fill
geom_text(aes(y = .5, x = (xmin + xmax) / 2, label = fill, fontface="bold", family="Palatino Linotype"), angle = 0,size=4.5) +
# scale_y_reverse(breaks = seq(1, 10), expand = expansion(mult = c(0, 0))) +
scale_x_continuous(breaks = seq(1, 10), expand = expansion(mult = c(0, 0))) +
scale_y_continuous(breaks = c(0), expand = expansion(mult = c(0, 0))) +
guides(fill = FALSE) + labs(x="", y="") +
theme_bw(base_family = "Palatino Linotype", base_size = 20) +
theme(axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank(),plot.margin = unit(c(0, 0, -2, 0), "cm"))
# p3
# Models type as panels
p4 <- tibble(xmin = c(0,4.8), xmax = c(4.8,6), fill = c("Univariable Models", "Multivariable Models")) %>%
ggplot() +
geom_rect(aes(ymin = 0, ymax = 1, xmin = xmin, xmax = xmax), color="grey50", fill="white") + #, fill = fill
geom_text(aes(y = .5, x = (xmin + xmax) / 2, label = fill, fontface="bold", family="Palatino Linotype"), angle = 0,size=4.5) +
# scale_y_reverse(breaks = seq(1, 10), expand = expansion(mult = c(0, 0))) +
#scale_x_continuous(breaks = seq(0, 6)) +
scale_x_continuous(breaks = seq(0, 6),limits=c(0,6), expand = expansion(mult = c(.2, 0))) +
scale_y_continuous(breaks = c(0), expand = expansion(mult = c(0, 0))) +
guides(fill = FALSE) + labs(x="", y="") +
theme_bw(base_family = "Palatino Linotype", base_size = 20) +
theme(axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank(),plot.margin = unit(c(-2, -2, -2, -2), "cm"))
# p4
p5 <- tibble(xmin = c(0,4.8), xmax = c(4.8,6), fill = c("Univariable Models", "Multivariable Models")) %>%
ggplot() +
geom_rect(aes(ymin = 0, ymax = 1, xmin = xmin, xmax = xmax), color="grey50", fill="white") + #, fill = fill
geom_text(aes(y = .5, x = (xmin + xmax) / 2, label = fill, fontface="bold", family="Palatino Linotype"), angle = 0,size=4.5) +
# scale_y_reverse(breaks = seq(1, 10), expand = expansion(mult = c(0, 0))) +
#scale_x_continuous(breaks = seq(0, 6)) +
scale_x_continuous(breaks = seq(0, 6),limits=c(0,6), expand = expansion(mult = c(0, 0))) +
scale_y_continuous(breaks = c(0), expand = expansion(mult = c(0, 0))) +
guides(fill = FALSE) + labs(x="", y="") +
theme_bw(base_family = "Palatino Linotype", base_size = 20) +
theme(axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank(),plot.margin = unit(c(-2, -2, -2, -2), "cm"),axis.title=element_blank(),axis.ticks.length = unit(0, "pt"))
# p5
# ROC AUC with models names as panels
fig5.final<-(patchwork::plot_spacer()+p5+ patchwork::plot_layout(widths = c(1.4, 8.6)))/(p2 + fig5 + patchwork::plot_layout(widths = c(0.7, 9.3))) + patchwork::plot_layout(heights = c(0.4, 9.6))
fig5.final
png 2
Produced using Visio.
melted_mat.all2 <- melted_mat.all %>% dplyr::filter(Var1 %in% keep_models) %>% dplyr::filter(Var2 %in% keep_models) %>% droplevels()
melted_mat.blad2 <- melted_mat.blad %>% dplyr::filter(Var1 %in% keep_models) %>% dplyr::filter(Var2 %in% keep_models) %>% droplevels()
melted_mat.mel2 <- melted_mat.mel %>% dplyr::filter(Var1 %in% keep_models) %>% dplyr::filter(Var2 %in% keep_models) %>% droplevels()
melted_mat.rcc2 <- melted_mat.rcc %>% dplyr::filter(Var1 %in% keep_models) %>% dplyr::filter(Var2 %in% keep_models) %>% droplevels()
melted_mat.gas2 <- melted_mat.gas %>% dplyr::filter(Var1 %in% keep_models) %>% dplyr::filter(Var2 %in% keep_models) %>% droplevels()
melted_mat.blad.spec$Var1 <- ifelse(as.character(melted_mat.blad.spec$Var1)=="M32 (RFE-SVM Radial): R ~ logTMB + PDL1 + BCR richness + TIS + B cells + TCR richness + CD8 cells + CD4 cells + Tumor Purity","M32 (RFE-SVM Radial): R ~ .",as.character(melted_mat.blad.spec$Var1))
melted_mat.blad.spec$Var2 <- ifelse(as.character(melted_mat.blad.spec$Var2)=="M32 (RFE-SVM Radial): R ~ logTMB + PDL1 + BCR richness + TIS + B cells + TCR richness + CD8 cells + CD4 cells + Tumor Purity","M32 (RFE-SVM Radial): R ~ .",as.character(melted_mat.blad.spec$Var2))
melted_mat.blad.spec2 <- melted_mat.blad.spec %>% dplyr::filter(Var1 %in% keep_models) %>% dplyr::filter(Var2 %in% keep_models) %>% droplevels()
melted_mat.mel.spec$Var1 <- ifelse(as.character(melted_mat.mel.spec$Var1)=="M32 (RFE-SVM Linear): R ~ logTMB + B cells + BCR richness + TCR richness + Tumor Purity","M32 (RFE-SVM Linear)",as.character(melted_mat.mel.spec$Var1))
melted_mat.mel.spec$Var2 <- ifelse(as.character(melted_mat.mel.spec$Var2)=="M32 (RFE-SVM Linear): R ~ logTMB + B cells + BCR richness + TCR richness + Tumor Purity","M32 (RFE-SVM Linear)",as.character(melted_mat.mel.spec$Var2))
melted_mat.mel.spec2 <- melted_mat.mel.spec %>% dplyr::filter(Var1 %in% keep_models) %>% dplyr::filter(Var2 %in% keep_models) %>% droplevels()
rocsCompare.scheme1_all <-corHeatmap2(melted_mat.all2)
rocsCompare.scheme1_blad <-corHeatmap2(melted_mat.blad2)
rocsCompare.scheme1_mel <-corHeatmap2(melted_mat.mel2)
rocsCompare.scheme1_rcc <-corHeatmap2(melted_mat.rcc2)
rocsCompare.scheme1_gas <-corHeatmap2(melted_mat.gas2)
rocsCompare.scheme2 <-corHeatmap2(melted_mat.blad.spec2)
rocsCompare.scheme3 <-corHeatmap2(melted_mat.mel.spec2)
png
2
session_info <- sessionInfo()
writeLines(capture.output(session_info), paste0(sessionInfoPath,"F_paper_figures_tables_files.txt"))
sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19045)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_United States.1252
## [2] LC_CTYPE=English_United States.1252
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.1252
##
## attached base packages:
## [1] stats4 grid stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] officer_0.4.4 GOfuncR_1.14.0
## [3] vioplot_0.3.7 sm_2.2-5.7
## [5] openxlsx_4.2.5 DOSE_3.20.1
## [7] multienrichjam_0.0.62.900 GO.db_3.14.0
## [9] data.table_1.14.2 rrvgo_1.6.0
## [11] org.Hs.eg.db_3.14.0 AnnotationDbi_1.56.2
## [13] IRanges_2.28.0 S4Vectors_0.32.4
## [15] Biobase_2.54.0 BiocGenerics_0.40.0
## [17] clusterProfiler_4.2.2 intergraph_2.0-2
## [19] ggraph_2.0.5 tidygraph_1.2.2
## [21] ggnetwork_0.5.10 ggnet_0.1.0
## [23] corrr_0.4.4 xlsx_0.6.5
## [25] plyr_1.8.7 tidyquant_1.0.6
## [27] quantmod_0.4.20 TTR_0.24.3
## [29] PerformanceAnalytics_2.0.4 xts_0.12.1
## [31] zoo_1.8-10 lubridate_1.8.0
## [33] ggbeeswarm_0.6.0 rstatix_0.7.0
## [35] reshape2_1.4.4 readxl_1.4.0
## [37] flextable_0.7.0 reconPlots_0.1.0
## [39] precrec_0.12.9 caret_6.0-92
## [41] lattice_0.20-45 survminer_0.4.9
## [43] survival_3.3-1 pander_0.6.5
## [45] RColorBrewer_1.1-3 ggrepel_0.9.1
## [47] ggpubr_0.4.0 forcats_0.5.1
## [49] stringr_1.4.0 purrr_0.3.4
## [51] readr_2.1.2 tidyr_1.2.0
## [53] tidyverse_1.3.1 Cairo_1.5-15
## [55] cowplot_1.1.1 gridExtra_2.3
## [57] hrbrthemes_0.8.0 ggplot2_3.3.6
## [59] tibble_3.1.7 dplyr_1.0.9
## [61] DT_0.22 extrafont_0.18
## [63] pacman_0.5.1
##
## loaded via a namespace (and not attached):
## [1] statnet.common_4.6.0 Hmisc_4.7-0 class_7.3-20
## [4] foreach_1.5.2 crayon_1.5.2 MASS_7.3-57
## [7] nlme_3.1-157 backports_1.4.1 reprex_2.0.1
## [10] GOSemSim_2.20.0 rlang_1.1.3 XVector_0.34.0
## [13] extrafontdb_1.0 BiocParallel_1.28.3 rjson_0.2.21
## [16] bit64_4.0.5 glue_1.6.2 pheatmap_1.0.12
## [19] parallel_4.1.0 vipor_0.4.5 haven_2.5.1
## [22] tidyselect_1.1.2 km.ci_0.5-6 xtable_1.8-4
## [25] magrittr_2.0.3 evaluate_0.18 gdtools_0.2.4
## [28] cli_3.6.2 zlibbioc_1.40.0 miniUI_0.1.1.1
## [31] rstudioapi_0.13 bslib_0.3.1 rpart_4.1.16
## [34] fastmatch_1.1-3 wordcloud_2.6 treeio_1.18.1
## [37] shiny_1.7.1 xfun_0.35 tm_0.7-8
## [40] clue_0.3-60 cluster_2.1.2 KEGGREST_1.34.0
## [43] mapplots_1.5.1 ape_5.6-2 listenv_0.8.0
## [46] xlsxjars_0.6.1 Biostrings_2.62.0 png_0.1-7
## [49] future_1.26.1 ipred_0.9-13 withr_2.5.0
## [52] bitops_1.0-7 slam_0.1-50 ggforce_0.3.3
## [55] cellranger_1.1.0 hardhat_1.2.0 pROC_1.18.0
## [58] coda_0.19-4 pillar_1.8.0 GlobalOptions_0.1.2
## [61] cachem_1.0.6 broom.helpers_1.9.0 fs_1.5.2
## [64] kernlab_0.9-30 NLP_0.2-1 GetoptLong_1.0.5
## [67] vctrs_0.4.1 ellipsis_0.3.2 generics_0.1.3
## [70] lava_1.6.10 tools_4.1.0 questionr_0.7.7
## [73] foreign_0.8-82 beeswarm_0.4.0 munsell_0.5.0
## [76] tweenr_1.0.2 fgsea_1.20.0 fastmap_1.1.0
## [79] compiler_4.1.0 abind_1.4-5 httpuv_1.6.5
## [82] gt_0.8.0 rJava_1.0-6 GenomeInfoDbData_1.2.7
## [85] prodlim_2019.11.13 deldir_1.0-6 utf8_1.2.2
## [88] later_1.3.0 recipes_1.0.1 Quandl_2.11.0
## [91] survivalAnalysis_0.3.0 jsonlite_1.8.3 scales_1.2.0
## [94] tidytree_0.4.1 carData_3.0-5 lazyeval_0.2.2
## [97] promises_1.2.0.1 car_3.1-1 doParallel_1.0.17
## [100] latticeExtra_0.6-30 checkmate_2.1.0 rmarkdown_2.14
## [103] klaR_1.7-0 downloader_0.4 treemap_2.4-3
## [106] igraph_1.3.1 yaml_2.3.6 gtsummary_1.6.2
## [109] systemfonts_1.0.4 htmltools_0.5.3 memoise_2.0.1
## [112] graphlayouts_0.8.0 quadprog_1.5-8 viridisLite_0.4.1
## [115] digest_0.6.29 assertthat_0.2.1 mime_0.12
## [118] Rttf2pt1_1.3.10 KMsurv_0.1-5 RSQLite_2.2.13
## [121] yulab.utils_0.0.4 future.apply_1.9.0 blob_1.2.3
## [124] tidytidbits_0.3.2 survMisc_0.5.6 labeling_0.4.2
## [127] splines_4.1.0 Formula_1.2-4 RCurl_1.98-1.6
## [130] broom_1.0.1 hms_1.1.2 modelr_0.1.8
## [133] colorspace_2.0-3 base64enc_0.1-3 GenomicRanges_1.46.1
## [136] shape_1.4.6 aplot_0.1.4 nnet_7.3-17
## [139] sass_0.4.1 Rcpp_1.0.9 circlize_0.4.14
## [142] enrichplot_1.14.2 fansi_1.0.3 tzdb_0.3.0
## [145] parallelly_1.32.1 ModelMetrics_1.2.2.2 R6_2.5.1
## [148] lifecycle_1.0.1 labelled_2.9.1 zip_2.2.0
## [151] curl_5.2.0 ggsignif_0.6.3 jquerylib_0.1.4
## [154] DO.db_2.9 Matrix_1.4-0 qvalue_2.26.0
## [157] iterators_1.0.14 gower_1.0.0 htmlwidgets_1.5.4
## [160] polyclip_1.10-0 network_1.17.1 shadowtext_0.1.2
## [163] gridGraphics_0.5-1 rvest_1.0.2 ComplexHeatmap_2.10.0
## [166] globals_0.15.1 htmlTable_2.4.1 patchwork_1.1.2.9000
## [169] codetools_0.2-18 matrixStats_0.62.0 gtools_3.9.3
## [172] dbplyr_2.1.1 gridBase_0.4-7 GenomeInfoDb_1.30.1
## [175] gtable_0.3.1 DBI_1.1.2 highr_0.9
## [178] ggfun_0.0.6 httr_1.4.7 stringi_1.7.8
## [181] farver_2.1.1 uuid_1.1-0 viridis_0.6.2
## [184] timeDate_4021.104 ggtree_3.2.1 jamba_0.0.87.900
## [187] combinat_0.0-8 xml2_1.3.3 interp_1.1-3
## [190] ggplotify_0.1.0 bit_4.0.5 scatterpie_0.1.7
## [193] jpeg_0.1-9 pkgconfig_2.0.3 knitr_1.41